bert与radare拆分
This commit is contained in:
parent
5cbc33f9ca
commit
24a7530ae6
25
exe2json.py
25
exe2json.py
@ -13,7 +13,7 @@ import os
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
ret_trap_opcode_family = ["ret", "hlt", "int3", "ud2"]
|
||||
|
||||
sample_type = 'benign'
|
||||
sample_type = 'malware'
|
||||
|
||||
|
||||
def extract_opcode(disasm_text):
|
||||
@ -41,9 +41,10 @@ def calc_sha256(file_path):
|
||||
return sha256
|
||||
|
||||
|
||||
def get_graph_cfg_r2pipe(r2pipe_open, file_path):
|
||||
def get_graph_cfg_r2pipe(r2pipe_open, file_path, feature_out):
|
||||
# CFG提取
|
||||
acfg_item = []
|
||||
acfg_feature_item = []
|
||||
try:
|
||||
# 获取函数列表
|
||||
function_list = r2pipe_open.cmdj("aflj")
|
||||
@ -132,17 +133,24 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path):
|
||||
# print(block_Statement)
|
||||
|
||||
|
||||
"""
|
||||
速度过慢
|
||||
"""
|
||||
# bert模型转化特征
|
||||
block_feature_list = bb2vec(block_Statement)
|
||||
# block_feature_list = bb2vec(block_Statement)
|
||||
# block_feature_list = []
|
||||
|
||||
# 暂时将bb地址作为特征 后续将操作码集中转化为特征
|
||||
block_feature_list = block_addr
|
||||
acfg_feature_item.append({'addr':block_addr, 'opcode':block_Statement})
|
||||
|
||||
# 过滤不存在的边
|
||||
for temp_edge in temp_edge_list:
|
||||
if temp_edge[0] in node_list and temp_edge[1] in node_list:
|
||||
edge_list.append(temp_edge)
|
||||
# 单独错误信息日志
|
||||
if block_number == 0 or len(block_feature_list) == 0:
|
||||
logger.warning(f"二进制可执行文件解析出错,出错文件:{file_path},出错函数地址:{function['offset']},基础块个数{block_number},基础块特征{block_feature_list}")
|
||||
if block_number == 0:
|
||||
logger.warning(f"二进制可执行文件解析出错,出错文件:{file_path},出错函数地址:{function['offset']},基础块个数{block_number}")
|
||||
# cfg构建
|
||||
acfg = {
|
||||
'block_number': block_number,
|
||||
@ -150,6 +158,8 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path):
|
||||
'block_features': block_feature_list
|
||||
}
|
||||
acfg_item.append(acfg)
|
||||
feature_out.write(json.dumps(acfg_feature_item))
|
||||
|
||||
return True, "二进制可执行文件解析成功", acfg_item
|
||||
except Exception as e:
|
||||
return False, e, None
|
||||
@ -222,7 +232,9 @@ def exe_to_json(file_path):
|
||||
# 获取r2pipe并解析文件 解析完即释放r2
|
||||
r2 = get_r2pipe(file_path)
|
||||
fcg_Operation_flag, fcg_Operation_message, function_num, function_fcg_edge_list, function_names = get_graph_fcg_r2pipe(r2)
|
||||
cfg_Operation_flag, cfg_Operation_message, cfg_item = get_graph_cfg_r2pipe(r2,file_path)
|
||||
with open(os.path.join(output_path, 'feature' ,file_fingerprint + '.jsonl'), 'w') as feature_out:
|
||||
cfg_Operation_flag, cfg_Operation_message, cfg_item = get_graph_cfg_r2pipe(r2,file_path, feature_out)
|
||||
feature_out.close()
|
||||
r2.quit()
|
||||
# 文件json构建
|
||||
|
||||
@ -256,7 +268,6 @@ if __name__ == '__main__':
|
||||
|
||||
sample_file_path = f"/mnt/d/bishe/dataset/sample_{sample_type}"
|
||||
sample_file_list = os.listdir(sample_file_path)
|
||||
# sample_file_list = ['001b1ca33bf52c5c09b3a852d0ac0254.exe']
|
||||
multi_thread(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list], thread_num=THREAD_FULL)
|
||||
# test_file_path = '/mnt/d/bishe/exe2json/sample/VirusShare_0a3b625380161cf92c4bb10135326bb5'
|
||||
# exe_to_json(test_file_path)
|
||||
|
Loading…
Reference in New Issue
Block a user