py2.7可行版本
This commit is contained in:
parent
8063d079db
commit
65d25d42de
@ -1,4 +1,5 @@
|
||||
# coding=utf-8
|
||||
import hashlib
|
||||
import pickle as pk
|
||||
import re
|
||||
import json
|
||||
@ -6,15 +7,22 @@ import os
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def convert(start, end, overhaul):
|
||||
for workflow in range(start, end):
|
||||
# workflow = 0
|
||||
cfg_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
|
||||
output_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_json".format(workflow)
|
||||
dot_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot".format(workflow)
|
||||
def calc_sha256(file_path):
|
||||
with open(file_path, 'rb') as f:
|
||||
bytes = f.read()
|
||||
sha256obj = hashlib.sha256(bytes)
|
||||
sha256 = sha256obj.hexdigest()
|
||||
return sha256
|
||||
|
||||
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_log{}.log".format(workflow)
|
||||
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_process_log{}.log".format(workflow)
|
||||
|
||||
def convert_malware(overhaul):
|
||||
cfg_dir = "D:\\bishe\\dataset\\infected\\infected_cfg"
|
||||
output_dir = "D:\\bishe\\dataset\\infected\\infected_jsonl"
|
||||
dot_dir = "D:\\bishe\\dataset\\infected\\infected_dot"
|
||||
raw_dir = "D:\\bishe\\dataset\\train_malware"
|
||||
|
||||
log_path = "D:\\bishe\\dataset\\logging\\convert_malware_log.log"
|
||||
process_log_path = "D:\\bishe\\dataset\\logging\\convert_malware_process_log.log"
|
||||
|
||||
if overhaul:
|
||||
if os.path.exists(log_path):
|
||||
@ -67,7 +75,7 @@ def convert(start, end, overhaul):
|
||||
|
||||
# 为当前pe文件创建json对象
|
||||
json_obj = {
|
||||
'hash': data.binary_name[11:],
|
||||
'hash': calc_sha256(raw_dir + "\\" + name),
|
||||
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
||||
# 'function_number': data.raw_graph_list.__len__(),
|
||||
'function_number': len(functions_list),
|
||||
@ -119,12 +127,13 @@ def convert(start, end, overhaul):
|
||||
|
||||
|
||||
def convert_benign(overhaul):
|
||||
cfg_dir = "F:\\kkk\\dataset\\benign\\refind_cfg"
|
||||
dot_dir = "F:\\kkk\\dataset\\benign\\refind_dot"
|
||||
output_dir = "F:\\kkk\\dataset\\benign\\refind_jsonl"
|
||||
cfg_dir = "D:\\bishe\\dataset\\benign\\refind_cfg"
|
||||
dot_dir = "D:\\bishe\\dataset\\benign\\refind_dot"
|
||||
output_dir = "D:\\bishe\\dataset\\benign\\refind_jsonl"
|
||||
raw_dir = "D:\\bishe\\dataset\\train_benign"
|
||||
|
||||
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_log.log"
|
||||
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_process_log{}.log"
|
||||
log_path = "D:\\bishe\\dataset\\logging\\convert_benign_log.log"
|
||||
process_log_path = "D:\\bishe\\dataset\\logging\\convert_benign_process_log.log"
|
||||
|
||||
if overhaul:
|
||||
if os.path.exists(log_path):
|
||||
@ -145,6 +154,7 @@ def convert_benign(overhaul):
|
||||
continue
|
||||
|
||||
name = cfg[:-4] # 纯文件名
|
||||
|
||||
cfg_file = open(os.path.join(cfg_dir, name + '.ida'), 'r')
|
||||
try:
|
||||
data = pk.load(cfg_file)
|
||||
@ -180,7 +190,7 @@ def convert_benign(overhaul):
|
||||
|
||||
# 为当前pe文件创建json对象
|
||||
json_obj = {
|
||||
'hash': data.binary_name[11:],
|
||||
'hash': calc_sha256(raw_dir + "\\" + name),
|
||||
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
||||
# 'function_number': data.raw_graph_list.__len__(),
|
||||
'function_number': len(functions_list),
|
||||
@ -233,4 +243,6 @@ def convert_benign(overhaul):
|
||||
|
||||
if __name__ == '__main__':
|
||||
# convert(35, 69)
|
||||
convert_benign(False)
|
||||
# convert_benign(True)
|
||||
convert_benign(True)
|
||||
convert_malware(True)
|
Loading…
Reference in New Issue
Block a user