Gencoding_plus/Genius3/raw-feature-extractor/read_idaFILE.py
Erio 17c1ac88b1 Complete Raw-feature-extractor
Complete the reproduction of the Raw-feature-extractor:

The purpose of read_idaFILE.py is to read the raw-feature from the generated .ida file and display
2021-11-19 16:29:15 +08:00

100 lines
3.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: UTF-8 -*-
import sys
import sys
from matplotlib import pyplot as plt
sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
sys.path.insert(1, 'C:/Python27/Lib/site-packages')
import networkx as nx
def print_obj(obj):
"打印对象的所有属性"
print(obj.__dict__)
import pickle
#sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant可能是间接引用的不识别。看了下所有函数的特征几乎都没有字符串常量可能都是写在别的地方然后引用的。
#sub_166C4 393
if __name__ == '__main__':
testpath = "C:\Program1\pycharmproject\Genius3/acfgs/hpcenter.ida"
fr = open(testpath, 'r')
data1 = pickle.load(fr) #一个二进制文件的acfgs
#print(type(data1))
#print_obj(data1)
#print data1.raw_graph_list[393]
#print_obj(data1.raw_graph_list[393])
#nx.draw(data1.raw_graph_list[393].g,with_labels=True)
#plt.show()
print "一个二进制文件的所有函数的原始特征list。"
print_obj(data1) #acfg list
print "\n"
print "一个函数的原始特征由old_gdiscovRe方法的ACFGgGenius方法的ACFGfun_feature表示函数级别的特征的向量三部分构成"
print_obj(data1.raw_graph_list[393]) #一个函数的acfg
print "\n"
feature=data1.raw_graph_list[393].fun_features
print "函数级别特征: # 1 function calls # 2 logic instructions # 3 TransferIns # 4 LocalVariables # 5 BB basicblocks# 6 Edges # 7 IncommingCalls# 8 Intrs# 9 between # 10 strings # 11 consts"
print feature
print "\n"
# G=data1.raw_graph_list[393].old_g
# print G.node[0] # G.node[i]是dict
# for key, value in G.node[0].items():
# print('{key}:{value}'.format(key=key, value=value))
# 一个基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 字节点数量? #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 如AND #8'numTIs' 转移指令数量
G=data1.raw_graph_list[393].g
print "# 一个基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 字节点数量? #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 如AND #8'numTIs' 转移指令数量"
print G.node[0]
print "\n"
# for key, value in G.node[0].items():
# print('{key}:{value}'.format(key=key, value=value))
#oldg就是读取IDA的CFG所以数量、方向等都一样g根据old_g生成也一样
#old g
G = data1.raw_graph_list[393].old_g
nx.draw(G,with_labels=True)
#plt.title('old_g')
plt.show()
# g
G = data1.raw_graph_list[393].g
nx.draw(G,with_labels=True)
#plt.title('Genius_g')
plt.show()
# draw graph with labels
pos = nx.spring_layout(G)
nx.draw(G, pos)
node_labels = nx.get_node_attributes(G, 'v') #networkx的node由属性。g的属性为'v'意为原始特征的vector。old_g的属性见cfg_constructor.py
nx.draw_networkx_labels(G, pos, labels=node_labels)
#plt.title('Genius_g with raw feature vector')
plt.show()
# 1 function calls本函数的函数调用指令call jal jalr数量。。注意arm中没有这些指令
# 2 logic instructions 本函数的逻辑运算指令数量。如and、or的数量
# 3 TransferIns 转移指令如jmp arm中为mov数量
# 4 LocalVariables 局部变量数量
# 5 BB basicblocks数量
# 6 Edges icfg edges数量。icfg是另一篇论文dicovRe中的特征这里暂时不管
# 7 IncommingCalls调用本函数的指令数量
# 8 Intrs 指令数量
# 9 between 结构特征中的betweeness。
# 10 strings 字符串
# 11 consts 数字常量