fix discovRe feature error
This commit is contained in:
parent
49bae1c6ca
commit
3206714241
228
raw-feature-extractor/discovRe.py
Executable file
228
raw-feature-extractor/discovRe.py
Executable file
@ -0,0 +1,228 @@
|
||||
#
|
||||
# Reference Lister
|
||||
#
|
||||
# List all functions and all references to them in the current section.
|
||||
#
|
||||
# Implemented with the idautils module
|
||||
#
|
||||
import networkx as nx
|
||||
import cPickle as pickle
|
||||
import pdb
|
||||
from graph_analysis_ida import *
|
||||
from graph_property import *
|
||||
#import wingdbstub
|
||||
#wingdbstub.Ensure()
|
||||
|
||||
def get_funcs(ea):
|
||||
funcs = {}
|
||||
# Get current ea
|
||||
# Loop from start to end in the current segment
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
funcname = GetFunctionName(funcea)
|
||||
func = get_func(funcea)
|
||||
blocks = FlowChart(func)
|
||||
funcs[funcname] = []
|
||||
for bl in blocks:
|
||||
start = bl.startEA
|
||||
end = bl.endEA
|
||||
funcs[funcname].append((start, end))
|
||||
return funcs
|
||||
|
||||
def get_funcs_for_discoverRe(ea):
|
||||
features = {}
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
funcname = GetFunctionName(funcea)
|
||||
print funcname
|
||||
func = get_func(funcea)
|
||||
feature = get_discoverRe_feature(func)
|
||||
features[funcname] = feature
|
||||
return features
|
||||
|
||||
def get_discoverRe_feature(func, icfg):
|
||||
start = func.startEA
|
||||
end = func.endEA
|
||||
features = []
|
||||
FunctionCalls = getFuncCalls(func)
|
||||
#1
|
||||
features.append(FunctionCalls)
|
||||
LogicInstr = getLogicInsts(func)
|
||||
#2
|
||||
features.append(LogicInstr)
|
||||
Transfer = getTransferInsts(func)
|
||||
#3
|
||||
features.append(Transfer)
|
||||
Locals = getLocalVariables(func)
|
||||
#4
|
||||
features.append(Locals)
|
||||
BB = getBasicBlocks(func)
|
||||
#5
|
||||
features.append(BB)
|
||||
Edges = len(icfg.edges())
|
||||
#6
|
||||
features.append(Edges)
|
||||
Incoming = getIncommingCalls(func)
|
||||
#7
|
||||
features.append(Incoming)
|
||||
#8
|
||||
Instrs = getIntrs(func)
|
||||
features.append(Instrs)
|
||||
between = retrieveGP(icfg)
|
||||
#9
|
||||
features.append(between)
|
||||
|
||||
strings, consts = getfunc_consts(func)
|
||||
features.append(strings)
|
||||
features.append(consts)
|
||||
return features
|
||||
|
||||
def get_func_names(ea):
|
||||
funcs = {}
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
funcname = GetFunctionName(funcea)
|
||||
funcs[funcname] = funcea
|
||||
return funcs
|
||||
|
||||
def get_func_bases(ea):
|
||||
funcs = {}
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
funcname = GetFunctionName(funcea)
|
||||
funcs[funcea] = funcname
|
||||
return funcs
|
||||
|
||||
def get_func_range(ea):
|
||||
funcs = {}
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
funcname = GetFunctionName(funcea)
|
||||
func = get_func(funcea)
|
||||
funcs[funcname] = (func.startEA, func.endEA)
|
||||
return funcs
|
||||
|
||||
def get_func_sequences(ea):
|
||||
funcs_bodylist = {}
|
||||
funcs = get_funcs(ea)
|
||||
for funcname in funcs:
|
||||
if funcname not in funcs_bodylist:
|
||||
funcs_bodylist[funcname] = []
|
||||
for start, end in funcs[funcname]:
|
||||
inst_addr = start
|
||||
while inst_addr <= end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
funcs_bodylist[funcname].append(opcode)
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return funcs_bodylist
|
||||
|
||||
def get_func_cfgs(ea):
|
||||
func_cfglist = {}
|
||||
i = 0
|
||||
start, end = get_section('LOAD')
|
||||
#print start, end
|
||||
for funcea in Functions(SegStart(ea)):
|
||||
if start <= funcea <= end:
|
||||
funcname = GetFunctionName(funcea)
|
||||
func = get_func(funcea)
|
||||
print i
|
||||
i += 1
|
||||
try:
|
||||
icfg = cfg.cfg_construct(func)
|
||||
func_cfglist[funcname] = icfg
|
||||
except:
|
||||
pass
|
||||
|
||||
return func_cfglist
|
||||
|
||||
def get_section(t):
|
||||
base = SegByName(t)
|
||||
start = SegByBase(base)
|
||||
end = SegEnd(start)
|
||||
return start, end
|
||||
|
||||
|
||||
def get_func_cfg_sequences(func_cfglist):
|
||||
func_cfg_seqlist = {}
|
||||
for funcname in func_cfglist:
|
||||
func_cfg_seqlist[funcname] = {}
|
||||
cfg = func_cfglist[funcname][0]
|
||||
for start, end in cfg:
|
||||
codesq = get_sequences(start, end)
|
||||
func_cfg_seqlist[funcname][(start,end)] = codesq
|
||||
|
||||
return func_cfg_seqlist
|
||||
|
||||
|
||||
def get_sequences(start, end):
|
||||
seq = []
|
||||
inst_addr = start
|
||||
while inst_addr <= end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
seq.append(opcode)
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return seq
|
||||
|
||||
def get_stack_arg(func_addr):
|
||||
print func_addr
|
||||
args = []
|
||||
stack = GetFrame(func_addr)
|
||||
if not stack:
|
||||
return []
|
||||
firstM = GetFirstMember(stack)
|
||||
lastM = GetLastMember(stack)
|
||||
i = firstM
|
||||
while i <=lastM:
|
||||
mName = GetMemberName(stack,i)
|
||||
mSize = GetMemberSize(stack,i)
|
||||
if mSize:
|
||||
i = i + mSize
|
||||
else:
|
||||
i = i+4
|
||||
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
|
||||
args.append(mName)
|
||||
return args
|
||||
|
||||
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
|
||||
|
||||
def processDataSegs():
|
||||
funcdata = {}
|
||||
datafunc = {}
|
||||
for n in xrange(idaapi.get_segm_qty()):
|
||||
seg = idaapi.getnseg(n)
|
||||
ea = seg.startEA
|
||||
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
||||
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
|
||||
start = idc.SegStart(ea)
|
||||
end = idc.SegEnd(ea)
|
||||
cur = start
|
||||
while cur <= end:
|
||||
refs = [v for v in DataRefsTo(cur)]
|
||||
for fea in refs:
|
||||
name = GetFunctionName(fea)
|
||||
if len(name)== 0:
|
||||
continue
|
||||
if name not in funcdata:
|
||||
funcdata[name] = [cur]
|
||||
else:
|
||||
funcdata[name].append(cur)
|
||||
if cur not in datafunc:
|
||||
datafunc[cur] = [name]
|
||||
else:
|
||||
datafunc[cur].append(name)
|
||||
cur = NextHead(cur)
|
||||
return funcdata, datafunc
|
||||
|
||||
def obtainDataRefs(callgraph):
|
||||
datarefs = {}
|
||||
funcdata, datafunc = processDataSegs()
|
||||
for node in callgraph:
|
||||
if node in funcdata:
|
||||
datas = funcdata[node]
|
||||
for dd in datas:
|
||||
refs = datafunc[dd]
|
||||
refs = list(set(refs))
|
||||
if node in datarefs:
|
||||
print refs
|
||||
datarefs[node] += refs
|
||||
datarefs[node] = list(set(datarefs[node]))
|
||||
else:
|
||||
datarefs[node] = refs
|
||||
return datarefs
|
||||
|
||||
|
@ -13,7 +13,8 @@ import cfg_constructor as cfg
|
||||
import cPickle as pickle
|
||||
import pdb
|
||||
from raw_graphs import *
|
||||
from discovRe_feature.discovRe import *
|
||||
#from discovRe_feature.discovRe import *
|
||||
from discovRe import *
|
||||
#import wingdbstub
|
||||
#wingdbstub.Ensure()
|
||||
def gt_funcNames(ea):
|
||||
|
@ -2,23 +2,146 @@ from idautils import *
|
||||
from idaapi import *
|
||||
from idc import *
|
||||
|
||||
def getSequences(start, end):
|
||||
seqs = []
|
||||
def getfunc_consts(func):
|
||||
strings = []
|
||||
consts = []
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
for bl in blocks:
|
||||
strs, conts = getBBconsts(bl)
|
||||
strings += strs
|
||||
consts += conts
|
||||
return strings, consts
|
||||
|
||||
def getConst(ea, offset):
|
||||
strings = []
|
||||
consts = []
|
||||
optype1 = GetOpType(ea, offset)
|
||||
if optype1 == idaapi.o_imm:
|
||||
imm_value = GetOperandValue(ea, offset)
|
||||
if 0<= imm_value <= 10:
|
||||
consts.append(imm_value)
|
||||
else:
|
||||
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
|
||||
str_value = GetString(imm_value)
|
||||
if str_value is None:
|
||||
str_value = GetString(imm_value+0x40000)
|
||||
if str_value is None:
|
||||
consts.append(imm_value)
|
||||
else:
|
||||
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||
if re:
|
||||
strings.append(str_value)
|
||||
else:
|
||||
consts.append(imm_value)
|
||||
else:
|
||||
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||
if re:
|
||||
strings.append(str_value)
|
||||
else:
|
||||
consts.append(imm_value)
|
||||
else:
|
||||
consts.append(imm_value)
|
||||
return strings, consts
|
||||
|
||||
def getBBconsts(bl):
|
||||
strings = []
|
||||
consts = []
|
||||
start = bl[0]
|
||||
end = bl[1]
|
||||
invoke_num = 0
|
||||
inst_addr = start
|
||||
while inst_addr < end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
if opcode == 'move' or opcode == "mov":
|
||||
opnd1 = GetOpnd(inst_addr,0)
|
||||
if opnd1 == '$v0' or opnd1 == "$eax":
|
||||
opcode = (opcode, opnd1)
|
||||
seqs.append(opcode)
|
||||
if opcode in ['la','jalr','call', 'jal']:
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return seqs
|
||||
continue
|
||||
strings_src, consts_src = getConst(inst_addr, 0)
|
||||
strings_dst, consts_dst = getConst(inst_addr, 1)
|
||||
strings += strings_src
|
||||
strings += strings_dst
|
||||
consts += consts_src
|
||||
consts += consts_dst
|
||||
try:
|
||||
strings_dst, consts_dst = getConst(inst_addr, 2)
|
||||
consts += consts_dst
|
||||
strings += strings_dst
|
||||
except:
|
||||
pass
|
||||
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return strings, consts
|
||||
|
||||
def getFuncCalls(func):
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
sumcalls = 0
|
||||
for bl in blocks:
|
||||
callnum = calCalls(bl)
|
||||
sumcalls += callnum
|
||||
return sumcalls
|
||||
|
||||
def getLogicInsts(func):
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
sumcalls = 0
|
||||
for bl in blocks:
|
||||
callnum = calLogicInstructions(bl)
|
||||
sumcalls += callnum
|
||||
return sumcalls
|
||||
|
||||
def getTransferInsts(func):
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
sumcalls = 0
|
||||
for bl in blocks:
|
||||
callnum = calTransferIns(bl)
|
||||
sumcalls += callnum
|
||||
return sumcalls
|
||||
|
||||
def getIntrs(func):
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
sumcalls = 0
|
||||
for bl in blocks:
|
||||
callnum = calInsts(bl)
|
||||
sumcalls += callnum
|
||||
return sumcalls
|
||||
|
||||
def getLocalVariables(func):
|
||||
args_num = get_stackVariables(func.startEA)
|
||||
return args_num
|
||||
|
||||
def getBasicBlocks(func):
|
||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||
return len(blocks)
|
||||
|
||||
def getIncommingCalls(func):
|
||||
refs = CodeRefsTo(func.startEA, 0)
|
||||
re = len([v for v in refs])
|
||||
return re
|
||||
|
||||
|
||||
def get_stackVariables(func_addr):
|
||||
#print func_addr
|
||||
args = []
|
||||
stack = GetFrame(func_addr)
|
||||
if not stack:
|
||||
return 0
|
||||
firstM = GetFirstMember(stack)
|
||||
lastM = GetLastMember(stack)
|
||||
i = firstM
|
||||
while i <=lastM:
|
||||
mName = GetMemberName(stack,i)
|
||||
mSize = GetMemberSize(stack,i)
|
||||
if mSize:
|
||||
i = i + mSize
|
||||
else:
|
||||
i = i+4
|
||||
if mName not in args and mName and 'var_' in mName:
|
||||
args.append(mName)
|
||||
return len(args)
|
||||
|
||||
|
||||
|
||||
def calArithmeticIns(bl):
|
||||
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
|
||||
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
|
||||
arm_AI = {"ADD":1, "ADC":1, "SUB":1, "SBC":1, "RSB":1, "RSC":1, "MUL":1, "MLA":1}
|
||||
calls = {}
|
||||
calls.update(x86_AI)
|
||||
calls.update(mips_AI)
|
||||
@ -28,22 +151,20 @@ def calArithmeticIns(bl):
|
||||
inst_addr = start
|
||||
while inst_addr < end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
re = [v for v in calls if opcode in v]
|
||||
if len(re) > 0:
|
||||
if opcode in calls:
|
||||
invoke_num += 1
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return invoke_num
|
||||
|
||||
def calCalls(bl):
|
||||
calls = {'call':1, 'jal':1, 'jalr':1, "BL":1}
|
||||
calls = {'call':1, 'jal':1, 'jalr':1}
|
||||
start = bl[0]
|
||||
end = bl[1]
|
||||
invoke_num = 0
|
||||
inst_addr = start
|
||||
while inst_addr < end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
re = [v for v in calls if opcode in v]
|
||||
if len(re) > 0:
|
||||
if opcode in calls:
|
||||
invoke_num += 1
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return invoke_num
|
||||
@ -61,19 +182,16 @@ def calInsts(bl):
|
||||
def calLogicInstructions(bl):
|
||||
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
|
||||
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
|
||||
arm_LI = {"AND":1, "EOR":1, "ORR":1, "ORN":1, 'BIC':1}
|
||||
calls = {}
|
||||
calls.update(x86_LI)
|
||||
calls.update(mips_LI)
|
||||
calls.update(arm_LI)
|
||||
start = bl[0]
|
||||
end = bl[1]
|
||||
invoke_num = 0
|
||||
inst_addr = start
|
||||
while inst_addr < end:
|
||||
opcode = GetMnem(inst_addr)
|
||||
re = [v for v in calls if opcode in v]
|
||||
if len(re) > 0:
|
||||
if opcode in calls:
|
||||
invoke_num += 1
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return invoke_num
|
||||
@ -90,35 +208,19 @@ def calSconstants(bl):
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return invoke_num
|
||||
|
||||
def getConst(ea, offset):
|
||||
strings = []
|
||||
consts = []
|
||||
optype1 = GetOpType(ea, offset)
|
||||
if optype1 == idaapi.o_imm:
|
||||
imm_value = GetOperandValue(ea, offset)
|
||||
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
|
||||
str_value = GetString(imm_value)
|
||||
strings.append(str_value)
|
||||
else:
|
||||
consts.append(imm_value)
|
||||
return strings, consts
|
||||
|
||||
def getBBconsts(bl):
|
||||
strings = []
|
||||
consts = []
|
||||
def calNconstants(bl):
|
||||
start = bl[0]
|
||||
end = bl[1]
|
||||
invoke_num = 0
|
||||
inst_addr = start
|
||||
while inst_addr < end:
|
||||
strings_src, consts_src = getConst(inst_addr, 0)
|
||||
strings_dst, consts_dst = getConst(inst_addr, 1)
|
||||
strings += strings_src
|
||||
strings += strings_dst
|
||||
consts += consts_src
|
||||
consts += consts_dst
|
||||
optype1 = GetOpType(inst_addr, 0)
|
||||
optype2 = GetOpType(inst_addr, 1)
|
||||
if optype1 == 5 or optype2 == 5:
|
||||
invoke_num += 1
|
||||
inst_addr = NextHead(inst_addr)
|
||||
return strings, consts
|
||||
return invoke_num
|
||||
|
||||
def retrieveExterns(bl, ea_externs):
|
||||
externs = []
|
||||
@ -142,7 +244,6 @@ def calTransferIns(bl):
|
||||
calls = {}
|
||||
calls.update(x86_TI)
|
||||
calls.update(mips_TI)
|
||||
calls.update(arm_TI)
|
||||
start = bl[0]
|
||||
end = bl[1]
|
||||
invoke_num = 0
|
||||
|
Loading…
Reference in New Issue
Block a user