2023-12-02 21:53:57 +08:00
|
|
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
|
import sys
|
|
|
|
|
from matplotlib import pyplot as plt
|
|
|
|
|
import networkx as nx
|
|
|
|
|
|
|
|
|
|
import hashlib
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def print_obj(obj):
|
|
|
|
|
"打印对象的所有属性"
|
|
|
|
|
print(obj.__dict__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def calc_sha256(file_path):
|
|
|
|
|
with open(file_path, 'rb') as f:
|
|
|
|
|
bytes = f.read()
|
|
|
|
|
sha256obj = hashlib.sha256(bytes)
|
|
|
|
|
sha256 = sha256obj.hexdigest()
|
|
|
|
|
return sha256
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pickle
|
|
|
|
|
|
|
|
|
|
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
|
|
|
|
# sub_166C4 393
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
file_path = '../3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe'
|
|
|
|
|
testpath = '../store/3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe.ida'
|
|
|
|
|
fr = open(testpath, 'r')
|
|
|
|
|
data1 = pickle.load(fr) # 一个二进制文件的acfgs
|
|
|
|
|
# function_edges
|
|
|
|
|
function_edge_start = []
|
|
|
|
|
function_edge_end = []
|
|
|
|
|
for item in data1.raw_graph_list[0].old_g.edges:
|
|
|
|
|
function_edge_start.append(item[0])
|
|
|
|
|
function_edge_end.append(item[1])
|
|
|
|
|
function_edges = [function_edge_start, function_edge_end]
|
|
|
|
|
fun_name_temp = []
|
|
|
|
|
# function hsah
|
|
|
|
|
file_hash = calc_sha256(file_path)
|
|
|
|
|
# funtion num
|
|
|
|
|
function_number = len(data1.raw_graph_list)
|
|
|
|
|
acfg_list = []
|
|
|
|
|
# 函数级特征
|
|
|
|
|
for i in range(len(data1.raw_graph_list)):
|
|
|
|
|
|
|
|
|
|
# function name
|
|
|
|
|
fun_name_temp.append(data1.raw_graph_list[i].funcname)
|
|
|
|
|
# block features
|
|
|
|
|
temp_G = data1.raw_graph_list[i].old_g
|
|
|
|
|
# block_number
|
|
|
|
|
block_number = len(temp_G.node)
|
|
|
|
|
# block_features
|
|
|
|
|
acfg_list_item_feature = []
|
|
|
|
|
for temp in range(len(temp_G.node)):
|
|
|
|
|
block_features = []
|
|
|
|
|
# call
|
|
|
|
|
block_features.append(temp_G.node[temp]['numCalls'])
|
|
|
|
|
# transfer
|
|
|
|
|
block_features.append(temp_G.node[temp]['numTIs'])
|
|
|
|
|
# arithmetic
|
|
|
|
|
block_features.append(temp_G.node[temp]['numAs'])
|
|
|
|
|
# logic
|
|
|
|
|
block_features.append(temp_G.node[temp]['numLIs'])
|
|
|
|
|
# compare
|
|
|
|
|
block_features.append(temp_G.node[temp]['numCom'])
|
|
|
|
|
# move
|
|
|
|
|
block_features.append(temp_G.node[temp]['numMov'])
|
|
|
|
|
# termination
|
|
|
|
|
block_features.append(temp_G.node[temp]['numTerm'])
|
|
|
|
|
# date declaration
|
|
|
|
|
block_features.append(temp_G.node[temp]['numDD'])
|
|
|
|
|
# total instructions
|
|
|
|
|
block_features.append(temp_G.node[temp]['numIns'])
|
|
|
|
|
# string or integer constants
|
|
|
|
|
block_features.append(len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len(
|
|
|
|
|
temp_G.node[temp]['consts']))
|
|
|
|
|
# offspring
|
|
|
|
|
block_features.append(temp_G.node[temp]['offs'])
|
|
|
|
|
acfg_list_item_feature.append(block_features)
|
|
|
|
|
edge_list_start = []
|
|
|
|
|
edge_list_end = []
|
|
|
|
|
for item in temp_G.edges:
|
|
|
|
|
edge_list_start.append(item[0])
|
|
|
|
|
edge_list_end.append(item[1])
|
|
|
|
|
block_edges = [edge_list_start, edge_list_end]
|
|
|
|
|
acfg_list_item = {"block_number": block_number, "block_edges": block_edges, "block_features": acfg_list_item_feature}
|
|
|
|
|
acfg_list.append(acfg_list_item)
|
|
|
|
|
|
|
|
|
|
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp, "hash": file_hash, "function_number": function_number}
|
|
|
|
|
json_str = json.dumps(json_temp)
|
|
|
|
|
print json_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-12-03 21:05:22 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|