基础块条数精简
This commit is contained in:
parent
7c8145b52a
commit
795e5f050e
21
exe2json.py
21
exe2json.py
@ -14,7 +14,7 @@ import multiprocessing
|
|||||||
|
|
||||||
ret_trap_opcode_family = ["ret", "hlt", "int3", "ud2"]
|
ret_trap_opcode_family = ["ret", "hlt", "int3", "ud2"]
|
||||||
|
|
||||||
sample_type = 'malware'
|
sample_type = 'benign'
|
||||||
|
|
||||||
|
|
||||||
def extract_opcode(disasm_text):
|
def extract_opcode(disasm_text):
|
||||||
@ -71,6 +71,7 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
|||||||
block_addr = block["addr"]
|
block_addr = block["addr"]
|
||||||
block_Statement = []
|
block_Statement = []
|
||||||
|
|
||||||
|
|
||||||
node_list.append(block["addr"])
|
node_list.append(block["addr"])
|
||||||
# 获取基本块的反汇编指令
|
# 获取基本块的反汇编指令
|
||||||
disasm = r2pipe_open.cmdj("pdj " + str(block["ninstr"]) + " @" + str(block["addr"]))
|
disasm = r2pipe_open.cmdj("pdj " + str(block["ninstr"]) + " @" + str(block["addr"]))
|
||||||
@ -78,17 +79,13 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
|||||||
# if len(disasm) != 49144:
|
# if len(disasm) != 49144:
|
||||||
# continue
|
# continue
|
||||||
if disasm:
|
if disasm:
|
||||||
last_op = ''
|
block_len = len(disasm)
|
||||||
# if len(disasm) > 200:
|
|
||||||
# logger.warning(
|
|
||||||
# f"基础块指令长度异常,{file_path},函数名称{function['name']}基础块地址{block['addr']},长度{len(disasm)}")
|
|
||||||
for op_index, op in enumerate(disasm):
|
for op_index, op in enumerate(disasm):
|
||||||
op_disasm = extract_opcode(op["disasm"])
|
|
||||||
# 防止大量重复的语句造成内存溢出
|
|
||||||
if op_disasm == last_op:
|
|
||||||
continue
|
|
||||||
last_op = op_disasm
|
|
||||||
# 提取操作码并转换为bert模型输入格式
|
# 提取操作码并转换为bert模型输入格式
|
||||||
|
op_disasm = extract_opcode(op["disasm"])
|
||||||
|
# 如果单个基础块的长度大于20且操作码重复,则跳过
|
||||||
|
if block_len > 20 and op_disasm in block_Statement:
|
||||||
|
continue
|
||||||
block_Statement.append(op_disasm)
|
block_Statement.append(op_disasm)
|
||||||
# 处理跳转码并构建cfg
|
# 处理跳转码并构建cfg
|
||||||
if 'jump' in op:
|
if 'jump' in op:
|
||||||
@ -129,6 +126,8 @@ def get_graph_cfg_r2pipe(r2pipe_open, file_path, output_path, file_name):
|
|||||||
# 当前指令是基础块的最后一条指令
|
# 当前指令是基础块的最后一条指令
|
||||||
if op not in ret_trap_opcode_family or op["type"] not in ["ret", "trap"]:
|
if op not in ret_trap_opcode_family or op["type"] not in ["ret", "trap"]:
|
||||||
temp_edge_list.append([block_addr, op["offset"] + op["size"]])
|
temp_edge_list.append([block_addr, op["offset"] + op["size"]])
|
||||||
|
if block_len > 20:
|
||||||
|
logger.warning(f"二进制可执行文件解析警告,基础块长度大于20,文件{file_path},基础块地址{block_addr},操作码长度{block_len}->{len(block_Statement)}")
|
||||||
|
|
||||||
# debugger
|
# debugger
|
||||||
# print(len(disasm))
|
# print(len(disasm))
|
||||||
@ -257,7 +256,7 @@ if __name__ == '__main__':
|
|||||||
sample_file_list = os.listdir(sample_file_path)
|
sample_file_list = os.listdir(sample_file_path)
|
||||||
print(f"max worker {os.cpu_count()}")
|
print(f"max worker {os.cpu_count()}")
|
||||||
with multiprocessing.Pool(processes=os.cpu_count()) as pool:
|
with multiprocessing.Pool(processes=os.cpu_count()) as pool:
|
||||||
result = list(tqdm(pool.imap_unordered(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list]), total=len(sample_file_list)))
|
result = list(tqdm(pool.imap_unordered(exe_to_json, [os.path.join(sample_file_path, file_name) for file_name in sample_file_list[::-1]]), total=len(sample_file_list)))
|
||||||
|
|
||||||
|
|
||||||
# with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
# with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
|
||||||
|
Loading…
Reference in New Issue
Block a user