detect_rep/tool/筛选控制流程图.py
2023-04-05 10:04:49 +08:00

77 lines
2.6 KiB
Python

import angr
import csv
import networkx as nx
import os
from tqdm import tqdm
import sys
# sys.path.append(r'../ASM2VEC_plus_scripts/')
# from func2vec import func2vec,load_model
# from node_feature import asm2vec_plus
import lief
from detect_pe_packer import detect_pack_res
import sys
# from node_feature import *
def get_node_feature(hex_asm="558bec83ec085756bf0bb80000ff15d4804000",node_feature_method="asm2vec_plus_16"):
if node_feature_method =="asm2vec_plus_16":
return asm2vec_plus_16(hex_asm=hex_asm).tolist()
asm2vec_model_path="../ASM2VEC_plus_scripts/asm2vec_checkpoints/model_100.pt"
#提取节点和边数量
def cfg_extract(file_list,data_dir="../data/malware",csv_save_path="../CFG_data/malware_msg.csv",header = ['malware_name','nodes_num','edgs_num']):
csv_data=[]
#载入asm2vec的模型
for i in tqdm(range(len(file_list))):
file_item=file_list[i]
#剔除加壳程序
# try:
if detect_pack_res(os.path.join(data_dir, file_item)) == True:
continue
# bin_parse = lief.PE.parse(os.path.join(data_dir,file_item))
p = angr.Project(os.path.join(data_dir,file_item), load_options={'auto_load_libs': False})
cfg = p.analyses.CFGFast(show_progressbar=True, normalize=False, resolve_indirect_jumps=False,
force_smart_scan=False, symbols=False, data_references=False)
# except:
# continue
G = cfg.graph
# if len(G.nodes)>10000 or len(G.nodes)<10:
# continue
#如果反汇编有出错,则当前这个文件舍弃
flag=1
#为每个节点设置相关信息
asm_hex_list=[]
try:
for e,dict in G.nodes.items():
asm_hex = e.block.bytes.hex().replace("0x", "")
asm_hex_list.append(asm_hex)
f_asm2vec_plus_16_list =get_node_feature(hex_asm=asm_hex_list, node_feature_method="asm2vec_plus_16")
except:
continue
if flag==1:
nodes_num=len(cfg.graph.nodes())
edgs_num=len(cfg.graph.edges())
csv_data.append([file_item+".gexf",str(nodes_num),str(edgs_num)])
with open(csv_save_path,'w',encoding='utf-8',newline='') as fp:
writer =csv.writer(fp)
writer.writerow(header)
writer.writerows(csv_data)
if __name__ == '__main__':
bengin_data_dir = "../data/benign_last"
bengin_list = os.listdir(bengin_data_dir)
cfg_extract(bengin_list,data_dir=bengin_data_dir,csv_save_path="./benign_msg.csv",header = ['malware_name','nodes_num','edgs_num'])