Gencoding_Ke/Genius3/raw-feature-extractor/func.py

311 lines
8.7 KiB
Python
Raw Permalink Normal View History

2021-11-18 17:43:34 +08:00
# -*- coding: UTF-8 -*-
#
# Reference Lister
#
# List all functions and all references to them in the current section.
#
# Implemented with the idautils module
#
from idautils import *
from idaapi import *
from idc import *
import networkx as nx
import cfg_constructor as cfg
import pdb
from raw_graphs import *
#from discovRe_feature.discovRe import *
from discovRe import *
2023-08-03 10:03:02 +08:00
2024-03-01 16:11:26 +08:00
2021-11-18 17:43:34 +08:00
2023-08-03 10:03:02 +08:00
2021-11-18 17:43:34 +08:00
def print_obj(obj):
2023-08-03 10:03:02 +08:00
# "打印对象的所有属性"
2021-11-18 17:43:34 +08:00
print(obj.__dict__)
2023-08-03 10:03:02 +08:00
2021-11-18 17:43:34 +08:00
def gt_funcNames(ea):
2023-08-03 10:03:02 +08:00
funcs = []
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
print(funcname)
continue
funcs.append(funcname)
return funcs
2021-11-18 17:43:34 +08:00
def get_funcs(ea):
2023-08-03 10:03:02 +08:00
funcs = {}
# Get current ea
# Loop from start to end in the current segment
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
continue
func = get_func(funcea)
blocks = FlowChart(func)
funcs[funcname] = []
for bl in blocks:
start = bl.startEA
end = bl.endEA
funcs[funcname].append((start, end))
return funcs
2021-11-18 17:43:34 +08:00
# used for the callgraph generation.
def get_func_namesWithoutE(ea):
2023-08-03 10:03:02 +08:00
funcs = {}
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if 'close' in funcname:
print(funcea)
if funcname in plt_func:
print(funcname)
continue
funcs[funcname] = funcea
return funcs
2021-11-18 17:43:34 +08:00
# used for the callgraph generation.
def get_func_names(ea):
2023-08-03 10:03:02 +08:00
funcs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
funcs[funcname] = funcea
return funcs
2021-11-18 17:43:34 +08:00
def get_func_bases(ea):
2023-08-03 10:03:02 +08:00
funcs = {}
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
continue
funcs[funcea] = funcname
return funcs
2021-11-18 17:43:34 +08:00
def get_func_range(ea):
2023-08-03 10:03:02 +08:00
funcs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
funcs[funcname] = (func.startEA, func.endEA)
return funcs
2021-11-18 17:43:34 +08:00
def get_unified_funcname(ea):
2023-08-03 10:03:02 +08:00
funcname = GetFunctionName(ea)
if len(funcname) > 0:
if '.' == funcname[0]:
funcname = funcname[1:]
return funcname
2021-11-18 17:43:34 +08:00
def get_func_sequences(ea):
2023-08-03 10:03:02 +08:00
funcs_bodylist = {}
funcs = get_funcs(ea)
for funcname in funcs:
if funcname not in funcs_bodylist:
funcs_bodylist[funcname] = []
for start, end in funcs[funcname]:
inst_addr = start
while inst_addr <= end:
opcode = GetMnem(inst_addr)
funcs_bodylist[funcname].append(opcode)
inst_addr = NextHead(inst_addr)
return funcs_bodylist
2021-11-18 17:43:34 +08:00
def get_func_cfgs_c(ea):
2023-08-03 10:03:02 +08:00
# type: (object) -> object
binary_name = idc.GetInputFile()
raw_cfgs = raw_graphs(binary_name)
externs_eas, ea_externs = processpltSegs()
i = 0
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print(i)
i += 1
icfg = cfg.getCfg(func, externs_eas, ea_externs)
func_f = get_discoverRe_feature(func, icfg[0])
bb_f = get_bb_features(func)
2023-08-12 13:48:27 +08:00
raw_g = raw_graph(funcname, icfg, func_f, bb_f)
2023-08-03 10:03:02 +08:00
raw_cfgs.append(raw_g) # raw_graphs 是另一个python class存储raw_graph的list。定义在 raw_graph.py
#print(raw_g.__dict__)
#print(raw_g) 由于raw_graph、raw_graphs都是class直接print只会打印<raw_graphs.raw_graphs instance at 0x09888FD0>,不能打印对象的属性。 #https://blog.51cto.com/steed/2046408 print_obj、 print(obj.__dict__)
return raw_cfgs
2021-11-18 17:43:34 +08:00
def get_func_cfgs_ctest(ea):
2023-08-03 10:03:02 +08:00
binary_name = idc.GetInputFile()
raw_cfgs = raw_graphs(binary_name)
externs_eas, ea_externs = processpltSegs()
i = 0
diffs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print(i)
i += 1
icfg, old_cfg = cfg.getCfg(func, externs_eas, ea_externs)
diffs[funcname] = (icfg, old_cfg)
#raw_g = raw_graph(funcname, icfg)
#raw_cfgs.append(raw_g)
return diffs
2021-11-18 17:43:34 +08:00
def get_func_cfgs(ea):
2023-08-03 10:03:02 +08:00
func_cfglist = {}
i = 0
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print(i)
i += 1
try:
icfg = cfg.getCfg(func)
func_cfglist[funcname] = icfg
except:
pass
return func_cfglist
2021-11-18 17:43:34 +08:00
def get_func_cfg_sequences(func_cfglist):
2023-08-03 10:03:02 +08:00
func_cfg_seqlist = {}
for funcname in func_cfglist:
func_cfg_seqlist[funcname] = {}
cfg = func_cfglist[funcname][0]
for start, end in cfg:
codesq = get_sequences(start, end)
func_cfg_seqlist[funcname][(start,end)] = codesq
2021-11-18 17:43:34 +08:00
2023-08-03 10:03:02 +08:00
return func_cfg_seqlist
2021-11-18 17:43:34 +08:00
def get_sequences(start, end):
2023-08-03 10:03:02 +08:00
seq = []
inst_addr = start
while inst_addr <= end:
opcode = GetMnem(inst_addr)
seq.append(opcode)
inst_addr = NextHead(inst_addr)
return seq
2021-11-18 17:43:34 +08:00
def get_stack_arg(func_addr):
2023-08-03 10:03:02 +08:00
print(func_addr)
args = []
stack = GetFrame(func_addr)
if not stack:
return []
firstM = GetFirstMember(stack)
lastM = GetLastMember(stack)
i = firstM
while i <=lastM:
mName = GetMemberName(stack,i)
mSize = GetMemberSize(stack,i)
if mSize:
i = i + mSize
else:
i = i+4
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
args.append(mName)
return args
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
2021-11-18 17:43:34 +08:00
def processExternalSegs():
2023-08-03 10:03:02 +08:00
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_XTRN]:
start = idc.SegStart(ea)
end = idc.SegEnd(ea)
cur = start
while cur <= end:
name = get_unified_funcname(cur)
funcdata[name] = hex(cur)
cur = NextHead(cur)
return funcdata
2021-11-18 17:43:34 +08:00
def processpltSegs():
2023-08-03 10:03:02 +08:00
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segname = SegName(ea)
if segname in ['.plt', 'extern', '.MIPS.stubs']:
start = seg.startEA
end = seg.endEA
cur = start
while cur < end:
name = get_unified_funcname(cur)
funcdata[name] = hex(cur)
datafunc[cur]= name
cur = NextHead(cur)
return funcdata, datafunc
2021-11-18 17:43:34 +08:00
def processDataSegs():
2023-08-03 10:03:02 +08:00
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
start = idc.SegStart(ea)
end = idc.SegEnd(ea)
cur = start
while cur <= end:
refs = [v for v in DataRefsTo(cur)]
for fea in refs:
name = get_unified_funcname(fea)
if len(name)== 0:
continue
if name not in funcdata:
funcdata[name] = [cur]
else:
funcdata[name].append(cur)
if cur not in datafunc:
datafunc[cur] = [name]
else:
datafunc[cur].append(name)
cur = NextHead(cur)
return funcdata, datafunc
2021-11-18 17:43:34 +08:00
def obtainDataRefs(callgraph):
2023-08-03 10:03:02 +08:00
datarefs = {}
funcdata, datafunc = processDataSegs()
for node in callgraph:
if node in funcdata:
datas = funcdata[node]
for dd in datas:
refs = datafunc[dd]
refs = list(set(refs))
if node in datarefs:
print(refs)
datarefs[node] += refs
datarefs[node] = list(set(datarefs[node]))
else:
datarefs[node] = refs
return datarefs
2021-11-18 17:43:34 +08:00