基础块条数精简
This commit is contained in:
parent
7c8145b52a
commit
795e5f050e
21
exe2json.py
21
exe2json.py
@ -14,7 +14,7 @@ import multiprocessing
|
||||
|
||||
ret_trap_opcode_family = ["ret", "hlt", "int3", "ud2"]
|
||||
|
||||
sample_type = 'malware'
|
||||
sample_type = 'benign'
|
||||
|
||||
|
||||
def extract_opcode(disasm_text):
|
||||
@ -71,6 +71,7 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
||||
block_addr = block["addr"]
|
||||
block_Statement = []
|
||||
|
||||
|
||||
node_list.append(block["addr"])
|
||||
# 获取基本块的反汇编指令
|
||||
disasm = r2pipe_open.cmdj("pdj " + str(block["ninstr"]) + " @" + str(block["addr"]))
|
||||
@ -78,17 +79,13 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
||||
# if len(disasm) != 49144:
|
||||
# continue
|
||||
if disasm:
|
||||
last_op = ''
|
||||
# if len(disasm) > 200:
|
||||
# logger.warning(
|
||||
# f"基础块指令长度异常,{file_path},函数名称{function['name']}基础块地址{block['addr']},长度{len(disasm)}")
|
||||
block_len = len(disasm)
|
||||
for op_index, op in enumerate(disasm):
|
||||
op_disasm = extract_opcode(op["disasm"])
|
||||
# 防止大量重复的语句造成内存溢出
|
||||
if op_disasm == last_op:
|
||||
continue
|
||||
last_op = op_disasm
|
||||
# 提取操作码并转换为bert模型输入格式
|
||||
op_disasm = extract_opcode(op["disasm"])
|
||||
# 如果单个基础块的长度大于20且操作码重复,则跳过
|
||||
if block_len > 20 and op_disasm in block_Statement:
|
||||
continue
|
||||
block_Statement.append(op_disasm)
|
||||
# 处理跳转码并构建cfg
|
||||
if 'jump' in op:
|
||||
@ -129,6 +126,8 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
||||
# 当前指令是基础块的最后一条指令
|
||||
if op not in ret_trap_opcode_family or op["type"] not in ["ret", "trap"]:
|
||||
temp_edge_list.append([block_addr, op["offset"] + op["size"]])
|
||||
if block_len > 20:
|
||||
logger.warning(f"二进制可执行文件解析警告,基础块长度大于20,文件{file_path},基础块地址{block_addr},操作码长度{block_len}->{len(block_Statement)}")
|
||||
|
||||
# debugger
|
||||
# print(len(disasm))
|
||||
@ -257,7 +256,7 @@ if __name__ == '__main__':
|
||||
sample_file_list = os.listdir(sample_file_path)
|
||||
print(f"max worker {os.cpu_count()}")
|
||||
with multiprocessing.Pool(processes=os.cpu_count()) as pool:
|
||||
result = list(tqdm(pool.imap_unordered(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list]), total=len(sample_file_list)))
|
||||
result = list(tqdm(pool.imap_unordered(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list[::-1]]), total=len(sample_file_list)))
|
||||
|
||||
|
||||
# with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
||||
|
Loading…
Reference in New Issue
Block a user