Gencoding_plus/raw-feature-extractor/graph_analysis_ida.py

156 lines
3.9 KiB
Python
Raw Normal View History

from idautils import *
from idaapi import *
from idc import *
def getSequences(start, end):
seqs = []
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode == 'move' or opcode == "mov":
opnd1 = GetOpnd(inst_addr,0)
if opnd1 == '$v0' or opnd1 == "$eax":
opcode = (opcode, opnd1)
seqs.append(opcode)
inst_addr = NextHead(inst_addr)
return seqs
def calArithmeticIns(bl):
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
arm_AI = {"ADD":1, "ADC":1, "SUB":1, "SBC":1, "RSB":1, "RSC":1, "MUL":1, "MLA":1}
calls = {}
calls.update(x86_AI)
calls.update(mips_AI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calCalls(bl):
calls = {'call':1, 'jal':1, 'jalr':1, "BL":1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calInsts(bl):
start = bl[0]
end = bl[1]
ea = start
num = 0
while ea < end:
num += 1
ea = NextHead(ea)
return num
def calLogicInstructions(bl):
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
arm_LI = {"AND":1, "EOR":1, "ORR":1, "ORN":1, 'BIC':1}
calls = {}
calls.update(x86_LI)
calls.update(mips_LI)
calls.update(arm_LI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calSconstants(bl):
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def getConst(ea, offset):
strings = []
consts = []
optype1 = GetOpType(ea, offset)
if optype1 == idaapi.o_imm:
imm_value = GetOperandValue(ea, offset)
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
str_value = GetString(imm_value)
strings.append(str_value)
else:
consts.append(imm_value)
return strings, consts
def getBBconsts(bl):
strings = []
consts = []
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
strings_src, consts_src = getConst(inst_addr, 0)
strings_dst, consts_dst = getConst(inst_addr, 1)
strings += strings_src
strings += strings_dst
consts += consts_src
consts += consts_dst
inst_addr = NextHead(inst_addr)
return strings, consts
def retrieveExterns(bl, ea_externs):
externs = []
start = bl[0]
end = bl[1]
inst_addr = start
while inst_addr < end:
refs = CodeRefsFrom(inst_addr, 1)
try:
ea = [v for v in refs if v in ea_externs][0]
externs.append(ea_externs[ea])
except:
pass
inst_addr = NextHead(inst_addr)
return externs
def calTransferIns(bl):
x86_TI = {'jmp':1, 'jz':1, 'jnz':1, 'js':1, 'je':1, 'jne':1, 'jg':1, 'jle':1, 'jge':1, 'ja':1, 'jnc':1, 'call':1}
mips_TI = {'beq':1, 'bne':1, 'bgtz':1, "bltz":1, "bgez":1, "blez":1, 'j':1, 'jal':1, 'jr':1, 'jalr':1}
arm_TI = {'MVN':1, "MOV":1}
calls = {}
calls.update(x86_TI)
calls.update(mips_TI)
calls.update(arm_TI)
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
re = [v for v in calls if opcode in v]
if len(re) > 0:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num