Gencoding_plus/Genius3/raw-feature-extractor/func.py
Erio 17c1ac88b1 Complete Raw-feature-extractor
Complete the reproduction of the Raw-feature-extractor:

The purpose of read_idaFILE.py is to read the raw-feature from the generated .ida file and display
2021-11-19 16:29:15 +08:00

294 lines
7.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: UTF-8 -*-
#
# Reference Lister
#
# List all functions and all references to them in the current section.
#
# Implemented with the idautils module
#
from idautils import *
from idaapi import *
from idc import *
import networkx as nx
import cfg_constructor as cfg
import cPickle as pickle
import pdb
from raw_graphs import *
#from discovRe_feature.discovRe import *
from discovRe import *
#import wingdbstub
#wingdbstub.Ensure()
def print_obj(obj):
"打印对象的所有属性"
print(obj.__dict__)
def gt_funcNames(ea):
funcs = []
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
print funcname
continue
funcs.append(funcname)
return funcs
def get_funcs(ea):
funcs = {}
# Get current ea
# Loop from start to end in the current segment
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
continue
func = get_func(funcea)
blocks = FlowChart(func)
funcs[funcname] = []
for bl in blocks:
start = bl.startEA
end = bl.endEA
funcs[funcname].append((start, end))
return funcs
# used for the callgraph generation.
def get_func_namesWithoutE(ea):
funcs = {}
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if 'close' in funcname:
print funcea
if funcname in plt_func:
print funcname
continue
funcs[funcname] = funcea
return funcs
# used for the callgraph generation.
def get_func_names(ea):
funcs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
funcs[funcname] = funcea
return funcs
def get_func_bases(ea):
funcs = {}
plt_func, plt_data = processpltSegs()
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
if funcname in plt_func:
continue
funcs[funcea] = funcname
return funcs
def get_func_range(ea):
funcs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
funcs[funcname] = (func.startEA, func.endEA)
return funcs
def get_unified_funcname(ea):
funcname = GetFunctionName(ea)
if len(funcname) > 0:
if '.' == funcname[0]:
funcname = funcname[1:]
return funcname
def get_func_sequences(ea):
funcs_bodylist = {}
funcs = get_funcs(ea)
for funcname in funcs:
if funcname not in funcs_bodylist:
funcs_bodylist[funcname] = []
for start, end in funcs[funcname]:
inst_addr = start
while inst_addr <= end:
opcode = GetMnem(inst_addr)
funcs_bodylist[funcname].append(opcode)
inst_addr = NextHead(inst_addr)
return funcs_bodylist
def get_func_cfgs_c(ea):
# type: (object) -> object
binary_name = idc.GetInputFile()
raw_cfgs = raw_graphs(binary_name)
externs_eas, ea_externs = processpltSegs()
i = 0
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print i
i += 1
icfg = cfg.getCfg(func, externs_eas, ea_externs)
func_f = get_discoverRe_feature(func, icfg[0])
raw_g = raw_graph(funcname, icfg, func_f) #生成一个rawcfg。raw_graph是一个python class定义在 raw_graph.py.包含g本文的ACFG、olg_gdiscovRe的acfg、feature函数级别的一些特征以及betweenness
raw_cfgs.append(raw_g) # raw_graphs 是另一个python class存储raw_graph的list。定义在 raw_graph.py
#print(raw_g.__dict__)
#print(raw_g) 由于raw_graph、raw_graphs都是class直接print只会打印<raw_graphs.raw_graphs instance at 0x09888FD0>,不能打印对象的属性。 #https://blog.51cto.com/steed/2046408 print_obj、 print(obj.__dict__)
return raw_cfgs
def get_func_cfgs_ctest(ea):
binary_name = idc.GetInputFile()
raw_cfgs = raw_graphs(binary_name)
externs_eas, ea_externs = processpltSegs()
i = 0
diffs = {}
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print i
i += 1
icfg, old_cfg = cfg.getCfg(func, externs_eas, ea_externs)
diffs[funcname] = (icfg, old_cfg)
#raw_g = raw_graph(funcname, icfg)
#raw_cfgs.append(raw_g)
return diffs
def get_func_cfgs(ea):
func_cfglist = {}
i = 0
for funcea in Functions(SegStart(ea)):
funcname = get_unified_funcname(funcea)
func = get_func(funcea)
print i
i += 1
try:
icfg = cfg.getCfg(func)
func_cfglist[funcname] = icfg
except:
pass
return func_cfglist
def get_func_cfg_sequences(func_cfglist):
func_cfg_seqlist = {}
for funcname in func_cfglist:
func_cfg_seqlist[funcname] = {}
cfg = func_cfglist[funcname][0]
for start, end in cfg:
codesq = get_sequences(start, end)
func_cfg_seqlist[funcname][(start,end)] = codesq
return func_cfg_seqlist
def get_sequences(start, end):
seq = []
inst_addr = start
while inst_addr <= end:
opcode = GetMnem(inst_addr)
seq.append(opcode)
inst_addr = NextHead(inst_addr)
return seq
def get_stack_arg(func_addr):
print func_addr
args = []
stack = GetFrame(func_addr)
if not stack:
return []
firstM = GetFirstMember(stack)
lastM = GetLastMember(stack)
i = firstM
while i <=lastM:
mName = GetMemberName(stack,i)
mSize = GetMemberSize(stack,i)
if mSize:
i = i + mSize
else:
i = i+4
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
args.append(mName)
return args
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
def processExternalSegs():
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_XTRN]:
start = idc.SegStart(ea)
end = idc.SegEnd(ea)
cur = start
while cur <= end:
name = get_unified_funcname(cur)
funcdata[name] = hex(cur)
cur = NextHead(cur)
return funcdata
def processpltSegs():
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segname = SegName(ea)
if segname in ['.plt', 'extern', '.MIPS.stubs']:
start = seg.startEA
end = seg.endEA
cur = start
while cur < end:
name = get_unified_funcname(cur)
funcdata[name] = hex(cur)
datafunc[cur]= name
cur = NextHead(cur)
return funcdata, datafunc
def processDataSegs():
funcdata = {}
datafunc = {}
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
start = idc.SegStart(ea)
end = idc.SegEnd(ea)
cur = start
while cur <= end:
refs = [v for v in DataRefsTo(cur)]
for fea in refs:
name = get_unified_funcname(fea)
if len(name)== 0:
continue
if name not in funcdata:
funcdata[name] = [cur]
else:
funcdata[name].append(cur)
if cur not in datafunc:
datafunc[cur] = [name]
else:
datafunc[cur].append(name)
cur = NextHead(cur)
return funcdata, datafunc
def obtainDataRefs(callgraph):
datarefs = {}
funcdata, datafunc = processDataSegs()
for node in callgraph:
if node in funcdata:
datas = funcdata[node]
for dd in datas:
refs = datafunc[dd]
refs = list(set(refs))
if node in datarefs:
print refs
datarefs[node] += refs
datarefs[node] = list(set(datarefs[node]))
else:
datarefs[node] = refs
return datarefs