# -*- coding: UTF-8 -*- import sys from matplotlib import pyplot as plt import networkx as nx import hashlib import json def print_obj(obj): "打印对象的所有属性" print(obj.__dict__) def calc_sha256(file_path): with open(file_path, 'rb') as f: bytes = f.read() sha256obj = hashlib.sha256(bytes) sha256 = sha256obj.hexdigest() return sha256 import pickle # sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。 # sub_166C4 393 if __name__ == '__main__': file_path = '../3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe' testpath = '../store/3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe.ida' fr = open(testpath, 'r') data1 = pickle.load(fr) # 一个二进制文件的acfgs # function_edges function_edge_start = [] function_edge_end = [] for item in data1.raw_graph_list[0].old_g.edges: function_edge_start.append(item[0]) function_edge_end.append(item[1]) function_edges = [function_edge_start, function_edge_end] fun_name_temp = [] # function hsah file_hash = calc_sha256(file_path) # funtion num function_number = len(data1.raw_graph_list) acfg_list = [] # 函数级特征 for i in range(len(data1.raw_graph_list)): # function name fun_name_temp.append(data1.raw_graph_list[i].funcname) # block features temp_G = data1.raw_graph_list[i].old_g # block_number block_number = len(temp_G.node) # block_features acfg_list_item_feature = [] for temp in range(len(temp_G.node)): block_features = [] # call block_features.append(temp_G.node[temp]['numCalls']) # transfer block_features.append(temp_G.node[temp]['numTIs']) # arithmetic block_features.append(temp_G.node[temp]['numAs']) # logic block_features.append(temp_G.node[temp]['numLIs']) # compare block_features.append(temp_G.node[temp]['numCom']) # move block_features.append(temp_G.node[temp]['numMov']) # termination block_features.append(temp_G.node[temp]['numTerm']) # date declaration block_features.append(temp_G.node[temp]['numDD']) # total instructions block_features.append(temp_G.node[temp]['numIns']) # string or integer constants block_features.append(len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len( temp_G.node[temp]['consts'])) # offspring block_features.append(temp_G.node[temp]['offs']) acfg_list_item_feature.append(block_features) edge_list_start = [] edge_list_end = [] for item in temp_G.edges: edge_list_start.append(item[0]) edge_list_end.append(item[1]) block_edges = [edge_list_start, edge_list_end] acfg_list_item = {"block_number": block_number, "block_edges": block_edges, "block_features": acfg_list_item_feature} acfg_list.append(acfg_list_item) json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp, "hash": file_hash, "function_number": function_number} json_str = json.dumps(json_temp) print json_str