80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
import angr
|
|
import csv
|
|
import networkx as nx
|
|
import os
|
|
from tqdm import tqdm
|
|
import sys
|
|
# sys.path.append(r'../ASM2VEC_plus_scripts/')
|
|
# from func2vec import func2vec,load_model
|
|
# from node_feature import asm2vec_plus
|
|
import lief
|
|
from detect_pe_packer import detect_pack_res
|
|
import sys
|
|
|
|
# from node_feature import *
|
|
|
|
|
|
def get_node_feature(hex_asm="558bec83ec085756bf0bb80000ff15d4804000",node_feature_method="asm2vec_plus_16"):
|
|
if node_feature_method =="asm2vec_plus_16":
|
|
return asm2vec_plus_16(hex_asm=hex_asm).tolist()
|
|
asm2vec_model_path="../ASM2VEC_plus_scripts/asm2vec_checkpoints/model_100.pt"
|
|
|
|
|
|
#提取节点和边数量
|
|
def cfg_extract(file_list,data_dir="../data/malware",csv_save_path="../CFG_data/malware_msg.csv",header = ['malware_name','nodes_num','edgs_num']):
|
|
|
|
csv_data=[]
|
|
#载入asm2vec的模型
|
|
for i in tqdm(range(len(file_list))):
|
|
file_item=file_list[i]
|
|
#剔除加壳程序
|
|
# try:
|
|
try:
|
|
if detect_pack_res(os.path.join(data_dir, file_item)) == True:
|
|
continue
|
|
|
|
# bin_parse = lief.PE.parse(os.path.join(data_dir,file_item))
|
|
p = angr.Project(os.path.join(data_dir,file_item), load_options={'auto_load_libs': False})
|
|
cfg = p.analyses.CFGFast(show_progressbar=True, normalize=False, resolve_indirect_jumps=False,
|
|
force_smart_scan=False, symbols=False, data_references=False)
|
|
except:
|
|
continue
|
|
# except:
|
|
# continue
|
|
|
|
|
|
G = cfg.graph
|
|
# if len(G.nodes)>10000 or len(G.nodes)<10:
|
|
# continue
|
|
|
|
#如果反汇编有出错,则当前这个文件舍弃
|
|
flag=1
|
|
#为每个节点设置相关信息
|
|
asm_hex_list=[]
|
|
try:
|
|
for e,dict in G.nodes.items():
|
|
asm_hex = e.block.bytes.hex().replace("0x", "")
|
|
asm_hex_list.append(asm_hex)
|
|
f_asm2vec_plus_16_list =get_node_feature(hex_asm=asm_hex_list, node_feature_method="asm2vec_plus_16")
|
|
except:
|
|
continue
|
|
if flag==1:
|
|
nodes_num=len(cfg.graph.nodes())
|
|
edgs_num=len(cfg.graph.edges())
|
|
csv_data.append([file_item+".gexf",str(nodes_num),str(edgs_num)])
|
|
|
|
with open(csv_save_path,'w',encoding='utf-8',newline='') as fp:
|
|
writer =csv.writer(fp)
|
|
writer.writerow(header)
|
|
writer.writerows(csv_data)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
bengin_data_dir = "../data/benign_last"
|
|
bengin_list = os.listdir(bengin_data_dir)
|
|
|
|
|
|
cfg_extract(bengin_list,data_dir=bengin_data_dir,csv_save_path="./benign_msg.csv",header = ['malware_name','nodes_num','edgs_num'])
|