py2.7可行版本
This commit is contained in:
parent
8063d079db
commit
65d25d42de
@ -1,4 +1,5 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
import hashlib
|
||||||
import pickle as pk
|
import pickle as pk
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
@ -6,15 +7,22 @@ import os
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
def convert(start, end, overhaul):
|
def calc_sha256(file_path):
|
||||||
for workflow in range(start, end):
|
with open(file_path, 'rb') as f:
|
||||||
# workflow = 0
|
bytes = f.read()
|
||||||
cfg_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
|
sha256obj = hashlib.sha256(bytes)
|
||||||
output_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_json".format(workflow)
|
sha256 = sha256obj.hexdigest()
|
||||||
dot_dir = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot".format(workflow)
|
return sha256
|
||||||
|
|
||||||
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_log{}.log".format(workflow)
|
|
||||||
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_process_log{}.log".format(workflow)
|
def convert_malware(overhaul):
|
||||||
|
cfg_dir = "D:\\bishe\\dataset\\infected\\infected_cfg"
|
||||||
|
output_dir = "D:\\bishe\\dataset\\infected\\infected_jsonl"
|
||||||
|
dot_dir = "D:\\bishe\\dataset\\infected\\infected_dot"
|
||||||
|
raw_dir = "D:\\bishe\\dataset\\train_malware"
|
||||||
|
|
||||||
|
log_path = "D:\\bishe\\dataset\\logging\\convert_malware_log.log"
|
||||||
|
process_log_path = "D:\\bishe\\dataset\\logging\\convert_malware_process_log.log"
|
||||||
|
|
||||||
if overhaul:
|
if overhaul:
|
||||||
if os.path.exists(log_path):
|
if os.path.exists(log_path):
|
||||||
@ -67,7 +75,7 @@ def convert(start, end, overhaul):
|
|||||||
|
|
||||||
# 为当前pe文件创建json对象
|
# 为当前pe文件创建json对象
|
||||||
json_obj = {
|
json_obj = {
|
||||||
'hash': data.binary_name[11:],
|
'hash': calc_sha256(raw_dir + "\\" + name),
|
||||||
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
||||||
# 'function_number': data.raw_graph_list.__len__(),
|
# 'function_number': data.raw_graph_list.__len__(),
|
||||||
'function_number': len(functions_list),
|
'function_number': len(functions_list),
|
||||||
@ -119,12 +127,13 @@ def convert(start, end, overhaul):
|
|||||||
|
|
||||||
|
|
||||||
def convert_benign(overhaul):
|
def convert_benign(overhaul):
|
||||||
cfg_dir = "F:\\kkk\\dataset\\benign\\refind_cfg"
|
cfg_dir = "D:\\bishe\\dataset\\benign\\refind_cfg"
|
||||||
dot_dir = "F:\\kkk\\dataset\\benign\\refind_dot"
|
dot_dir = "D:\\bishe\\dataset\\benign\\refind_dot"
|
||||||
output_dir = "F:\\kkk\\dataset\\benign\\refind_jsonl"
|
output_dir = "D:\\bishe\\dataset\\benign\\refind_jsonl"
|
||||||
|
raw_dir = "D:\\bishe\\dataset\\train_benign"
|
||||||
|
|
||||||
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_log.log"
|
log_path = "D:\\bishe\\dataset\\logging\\convert_benign_log.log"
|
||||||
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_process_log{}.log"
|
process_log_path = "D:\\bishe\\dataset\\logging\\convert_benign_process_log.log"
|
||||||
|
|
||||||
if overhaul:
|
if overhaul:
|
||||||
if os.path.exists(log_path):
|
if os.path.exists(log_path):
|
||||||
@ -145,6 +154,7 @@ def convert_benign(overhaul):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
name = cfg[:-4] # 纯文件名
|
name = cfg[:-4] # 纯文件名
|
||||||
|
|
||||||
cfg_file = open(os.path.join(cfg_dir, name + '.ida'), 'r')
|
cfg_file = open(os.path.join(cfg_dir, name + '.ida'), 'r')
|
||||||
try:
|
try:
|
||||||
data = pk.load(cfg_file)
|
data = pk.load(cfg_file)
|
||||||
@ -180,7 +190,7 @@ def convert_benign(overhaul):
|
|||||||
|
|
||||||
# 为当前pe文件创建json对象
|
# 为当前pe文件创建json对象
|
||||||
json_obj = {
|
json_obj = {
|
||||||
'hash': data.binary_name[11:],
|
'hash': calc_sha256(raw_dir + "\\" + name),
|
||||||
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
# 2023.8.12 bug fix: 这里获取的是内部函数的数量
|
||||||
# 'function_number': data.raw_graph_list.__len__(),
|
# 'function_number': data.raw_graph_list.__len__(),
|
||||||
'function_number': len(functions_list),
|
'function_number': len(functions_list),
|
||||||
@ -233,4 +243,6 @@ def convert_benign(overhaul):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# convert(35, 69)
|
# convert(35, 69)
|
||||||
convert_benign(False)
|
# convert_benign(True)
|
||||||
|
convert_benign(True)
|
||||||
|
convert_malware(True)
|
Loading…
Reference in New Issue
Block a user