Gencoding/raw-feature-extractor/graph_analysis_ida.py

387 lines
9.1 KiB
Python
Raw Permalink Normal View History

2023-12-02 21:53:57 +08:00
# coding=utf-8
from idautils import *
from idaapi import *
2023-12-02 21:53:57 +08:00
from idc import *
2023-12-02 21:53:57 +08:00
2016-09-17 03:36:34 +08:00
def getfunc_consts(func):
2023-12-02 21:53:57 +08:00
strings = []
consts = []
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
for bl in blocks:
strs, conts = getBBconsts(bl)
strings += strs
consts += conts
return strings, consts
2016-09-17 03:36:34 +08:00
def getConst(ea, offset):
2023-12-02 21:53:57 +08:00
strings = []
consts = []
optype1 = GetOpType(ea, offset)
if optype1 == o_imm:
imm_value = GetOperandValue(ea, offset)
if 0 <= imm_value <= 10:
consts.append(imm_value)
else:
if isLoaded(imm_value) and getseg(imm_value):
str_value = GetString(imm_value)
if str_value is None:
str_value = GetString(imm_value + 0x40000)
if str_value is None:
consts.append(imm_value)
else:
re = all(40 <= ord(c) < 128 for c in str_value)
if re:
strings.append(str_value)
else:
consts.append(imm_value)
else:
re = all(40 <= ord(c) < 128 for c in str_value)
if re:
strings.append(str_value)
else:
consts.append(imm_value)
else:
consts.append(imm_value)
return strings, consts
2016-09-17 03:36:34 +08:00
def getBBconsts(bl):
2023-12-02 21:53:57 +08:00
strings = []
consts = []
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in ['la', 'jalr', 'call', 'jal']:
inst_addr = NextHead(inst_addr)
continue
strings_src, consts_src = getConst(inst_addr, 0)
strings_dst, consts_dst = getConst(inst_addr, 1)
strings += strings_src
strings += strings_dst
consts += consts_src
consts += consts_dst
try:
strings_dst, consts_dst = getConst(inst_addr, 2)
consts += consts_dst
strings += strings_dst
except:
pass
inst_addr = NextHead(inst_addr)
return strings, consts
2016-09-17 03:36:34 +08:00
def getFuncCalls(func):
2023-12-02 21:53:57 +08:00
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calCalls(bl)
sumcalls += callnum
return sumcalls
2016-09-17 03:36:34 +08:00
def getLogicInsts(func):
2023-12-02 21:53:57 +08:00
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calLogicInstructions(bl)
sumcalls += callnum
return sumcalls
2016-09-17 03:36:34 +08:00
def getTransferInsts(func):
2023-12-02 21:53:57 +08:00
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calTransferIns(bl)
sumcalls += callnum
return sumcalls
2016-09-17 03:36:34 +08:00
def getIntrs(func):
2023-12-02 21:53:57 +08:00
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0
for bl in blocks:
callnum = calInsts(bl)
sumcalls += callnum
return sumcalls
2016-09-17 03:36:34 +08:00
def getLocalVariables(func):
2023-12-02 21:53:57 +08:00
args_num = get_stackVariables(func.startEA)
return args_num
2016-09-17 03:36:34 +08:00
def getBasicBlocks(func):
2023-12-02 21:53:57 +08:00
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
return len(blocks)
2016-09-17 03:36:34 +08:00
def getIncommingCalls(func):
2023-12-02 21:53:57 +08:00
refs = CodeRefsTo(func.startEA, 0)
re = len([v for v in refs])
return re
2016-09-17 03:36:34 +08:00
def get_stackVariables(func_addr):
2023-12-02 21:53:57 +08:00
# print func_addr
2016-09-17 03:36:34 +08:00
args = []
stack = GetFrame(func_addr)
if not stack:
2023-12-02 21:53:57 +08:00
return 0
2016-09-17 03:36:34 +08:00
firstM = GetFirstMember(stack)
lastM = GetLastMember(stack)
i = firstM
2023-12-02 21:53:57 +08:00
while i <= lastM:
mName = GetMemberName(stack, i)
mSize = GetMemberSize(stack, i)
2016-09-17 03:36:34 +08:00
if mSize:
2023-12-02 21:53:57 +08:00
i = i + mSize
2016-09-17 03:36:34 +08:00
else:
2023-12-02 21:53:57 +08:00
i = i + 4
2016-09-17 03:36:34 +08:00
if mName not in args and mName and 'var_' in mName:
args.append(mName)
return len(args)
def calArithmeticIns(bl):
2023-12-02 21:53:57 +08:00
"""
基本块算术指令统计
"""
x86_AI = {'add': 1, 'sub': 1, 'div': 1, 'imul': 1, 'idiv': 1, 'mul': 1, 'shl': 1, 'dec': 1, 'inc': 1}
mips_AI = {'add': 1, 'addu': 1, 'addi': 1, 'addiu': 1, 'mult': 1, 'multu': 1, 'div': 1, 'divu': 1}
calls = {}
calls.update(x86_AI)
calls.update(mips_AI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calCalls(bl):
2023-12-02 21:53:57 +08:00
"""
基本快转移指令
:param bl:
:return:
"""
calls = {'call': 1, 'jal': 1, 'jalr': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calInsts(bl):
2023-12-02 21:53:57 +08:00
"""
基本快指令数
:param bl:
:return:
"""
start = bl[0]
end = bl[1]
ea = start
num = 0
while ea < end:
num += 1
ea = NextHead(ea)
return num
def calLogicInstructions(bl):
2023-12-02 21:53:57 +08:00
"""
基本快逻辑运算
:param bl:
:return:
"""
x86_LI = {'and': 1, 'andn': 1, 'andnpd': 1, 'andpd': 1, 'andps': 1, 'andnps': 1, 'test': 1, 'xor': 1, 'xorpd': 1,
'pslld': 1}
mips_LI = {'and': 1, 'andi': 1, 'or': 1, 'ori': 1, 'xor': 1, 'nor': 1, 'slt': 1, 'slti': 1, 'sltu': 1}
calls = {}
calls.update(x86_LI)
calls.update(mips_LI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calSconstants(bl):
2023-12-02 21:53:57 +08:00
"""
基本快字符串常量
:param bl:
:return:
"""
calls = {}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
2016-09-17 03:36:34 +08:00
def calNconstants(bl):
2023-12-02 21:53:57 +08:00
"""
基本快整数常量
:param bl:
:return:
"""
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
optype1 = GetOpType(inst_addr, 0)
optype2 = GetOpType(inst_addr, 1)
if optype1 == 5 or optype2 == 5:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def retrieveExterns(bl, ea_externs):
2023-12-02 21:53:57 +08:00
externs = []
start = bl[0]
end = bl[1]
inst_addr = start
while inst_addr < end:
refs = CodeRefsFrom(inst_addr, 1)
try:
ea = [v for v in refs if v in ea_externs][0]
externs.append(ea_externs[ea])
except:
pass
inst_addr = NextHead(inst_addr)
return externs
def calTransferIns(bl):
2023-12-02 21:53:57 +08:00
"""
基本快转移指令
:param bl:
:return:
"""
x86_TI = {'jmp': 1, 'jz': 1, 'jnz': 1, 'js': 1, 'je': 1, 'jne': 1, 'jg': 1, 'jle': 1, 'jge': 1, 'ja': 1, 'jnc': 1,
'call': 1}
mips_TI = {'beq': 1, 'bne': 1, 'bgtz': 1, "bltz": 1, "bgez": 1, "blez": 1, 'j': 1, 'jal': 1, 'jr': 1, 'jalr': 1}
arm_TI = {'MVN': 1, "MOV": 1}
calls = {}
calls.update(x86_TI)
calls.update(mips_TI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calCompareIns(bl):
"""
基本快比较指令
:param bl:
:return:
"""
calls = {'cmp': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calMoveIns(bl):
"""
基本快移动指令
:param bl:
:return:
"""
calls = {'mov': 1, 'lea': 1, 'xchg': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calTerminationIns(bl):
"""
基本块终止指令
:param bl:
:return:
"""
calls = {'ret': 1, 'retn': 1, 'hlt': 1, 'sys_exit': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calDateDecIns(bl):
"""
基本快数据声明
:param bl:
:return:
"""
calls = {'db': 1, 'dw': 1, 'dd': 1, 'dq': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num