import angr import csv import networkx as nx import os from tqdm import tqdm import sys # sys.path.append(r'../ASM2VEC_plus_scripts/') # from func2vec import func2vec,load_model # from node_feature import asm2vec_plus import lief from detect_pe_packer import detect_pack_res import sys # from node_feature import * def get_node_feature(hex_asm="558bec83ec085756bf0bb80000ff15d4804000",node_feature_method="asm2vec_plus_16"): if node_feature_method =="asm2vec_plus_16": return asm2vec_plus_16(hex_asm=hex_asm).tolist() asm2vec_model_path="../ASM2VEC_plus_scripts/asm2vec_checkpoints/model_100.pt" #提取节点和边数量 def cfg_extract(file_list,data_dir="../data/malware",csv_save_path="../CFG_data/malware_msg.csv",header = ['malware_name','nodes_num','edgs_num']): csv_data=[] #载入asm2vec的模型 for i in tqdm(range(len(file_list))): file_item=file_list[i] #剔除加壳程序 # try: try: if detect_pack_res(os.path.join(data_dir, file_item)) == True: continue # bin_parse = lief.PE.parse(os.path.join(data_dir,file_item)) p = angr.Project(os.path.join(data_dir,file_item), load_options={'auto_load_libs': False}) cfg = p.analyses.CFGFast(show_progressbar=True, normalize=False, resolve_indirect_jumps=False, force_smart_scan=False, symbols=False, data_references=False) except: continue # except: # continue G = cfg.graph # if len(G.nodes)>10000 or len(G.nodes)<10: # continue #如果反汇编有出错,则当前这个文件舍弃 flag=1 #为每个节点设置相关信息 asm_hex_list=[] try: for e,dict in G.nodes.items(): asm_hex = e.block.bytes.hex().replace("0x", "") asm_hex_list.append(asm_hex) f_asm2vec_plus_16_list =get_node_feature(hex_asm=asm_hex_list, node_feature_method="asm2vec_plus_16") except: continue if flag==1: nodes_num=len(cfg.graph.nodes()) edgs_num=len(cfg.graph.edges()) csv_data.append([file_item+".gexf",str(nodes_num),str(edgs_num)]) with open(csv_save_path,'w',encoding='utf-8',newline='') as fp: writer =csv.writer(fp) writer.writerow(header) writer.writerows(csv_data) if __name__ == '__main__': bengin_data_dir = "../data/benign_last" bengin_list = os.listdir(bengin_data_dir) cfg_extract(bengin_list,data_dir=bengin_data_dir,csv_save_path="./benign_msg.csv",header = ['malware_name','nodes_num','edgs_num'])