Gencoding_plus/raw-feature-extractor/graph_analysis_ida.py
2016-09-16 15:36:34 -04:00

257 lines
6.0 KiB
Python

from idautils import *
from idaapi import *
from idc import *
def getfunc_consts(func):
strings = []
consts = []
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
for bl in blocks:
strs, conts = getBBconsts(bl)
strings += strs
consts += conts
return strings, consts
def getConst(ea, offset):
strings = []
consts = []
optype1 = GetOpType(ea, offset)
if optype1 == idaapi.o_imm:
imm_value = GetOperandValue(ea, offset)
if 0<= imm_value <= 10:
consts.append(imm_value)
else:
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
str_value = GetString(imm_value)
if str_value is None:
str_value = GetString(imm_value+0x40000)
if str_value is None:
consts.append(imm_value)
else:
re = all(40 <= ord(c) < 128 for c in str_value)
if re:
strings.append(str_value)
else:
consts.append(imm_value)
else:
re = all(40 <= ord(c) < 128 for c in str_value)
if re:
strings.append(str_value)
else:
consts.append(imm_value)
else:
consts.append(imm_value)
return strings, consts
def getBBconsts(bl):
strings = []
consts = []
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in ['la','jalr','call', 'jal']:
inst_addr = NextHead(inst_addr)
continue
strings_src, consts_src = getConst(inst_addr, 0)
strings_dst, consts_dst = getConst(inst_addr, 1)
strings += strings_src
strings += strings_dst
consts += consts_src
consts += consts_dst
try:
strings_dst, consts_dst = getConst(inst_addr, 2)
consts += consts_dst
strings += strings_dst
except:
pass
inst_addr = NextHead(inst_addr)
return strings, consts
def getFuncCalls(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calCalls(bl)
sumcalls += callnum
return sumcalls
def getLogicInsts(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calLogicInstructions(bl)
sumcalls += callnum
return sumcalls
def getTransferInsts(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calTransferIns(bl)
sumcalls += callnum
return sumcalls
def getIntrs(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calInsts(bl)
sumcalls += callnum
return sumcalls
def getLocalVariables(func):
args_num = get_stackVariables(func.startEA)
return args_num
def getBasicBlocks(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
return len(blocks)
def getIncommingCalls(func):
refs = CodeRefsTo(func.startEA, 0)
re = len([v for v in refs])
return re
def get_stackVariables(func_addr):
#print func_addr
args = []
stack = GetFrame(func_addr)
if not stack:
return 0
firstM = GetFirstMember(stack)
lastM = GetLastMember(stack)
i = firstM
while i <=lastM:
mName = GetMemberName(stack,i)
mSize = GetMemberSize(stack,i)
if mSize:
i = i + mSize
else:
i = i+4
if mName not in args and mName and 'var_' in mName:
args.append(mName)
return len(args)
def calArithmeticIns(bl):
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
calls = {}
calls.update(x86_AI)
calls.update(mips_AI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calCalls(bl):
calls = {'call':1, 'jal':1, 'jalr':1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calInsts(bl):
start = bl[0]
end = bl[1]
ea = start
num = 0
while ea < end:
num += 1
ea = NextHead(ea)
return num
def calLogicInstructions(bl):
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
calls = {}
calls.update(x86_LI)
calls.update(mips_LI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calSconstants(bl):
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calNconstants(bl):
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
optype1 = GetOpType(inst_addr, 0)
optype2 = GetOpType(inst_addr, 1)
if optype1 == 5 or optype2 == 5:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def retrieveExterns(bl, ea_externs):
externs = []
start = bl[0]
end = bl[1]
inst_addr = start
while inst_addr < end:
refs = CodeRefsFrom(inst_addr, 1)
try:
ea = [v for v in refs if v in ea_externs][0]
externs.append(ea_externs[ea])
except:
pass
inst_addr = NextHead(inst_addr)
return externs
def calTransferIns(bl):
x86_TI = {'jmp':1, 'jz':1, 'jnz':1, 'js':1, 'je':1, 'jne':1, 'jg':1, 'jle':1, 'jge':1, 'ja':1, 'jnc':1, 'call':1}
mips_TI = {'beq':1, 'bne':1, 'bgtz':1, "bltz":1, "bgez":1, "blez":1, 'j':1, 'jal':1, 'jr':1, 'jalr':1}
arm_TI = {'MVN':1, "MOV":1}
calls = {}
calls.update(x86_TI)
calls.update(mips_TI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num