backup
This commit is contained in:
parent
d599236e94
commit
ad2583dba9
@ -119,9 +119,9 @@ def convert(start, end, overhaul):
|
|||||||
|
|
||||||
|
|
||||||
def convert_benign(overhaul):
|
def convert_benign(overhaul):
|
||||||
cfg_dir = "D:\\hkn\\infected\\datasets\\benign_cfg\\new"
|
cfg_dir = "F:\\kkk\\dataset\\benign\\refind_cfg"
|
||||||
output_dir = "D:\\hkn\\infected\\datasets\\benign_json\\new"
|
dot_dir = "F:\\kkk\\dataset\\benign\\refind_dot"
|
||||||
dot_dir = "D:\\hkn\\infected\\datasets\\benign_dot\\new"
|
output_dir = "F:\\kkk\\dataset\\benign\\refind_jsonl"
|
||||||
|
|
||||||
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_log.log"
|
log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_log.log"
|
||||||
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_process_log{}.log"
|
process_log_path = "D:\\hkn\\infected\\datasets\\logging\\convert_benign_process_log{}.log"
|
||||||
@ -139,7 +139,8 @@ def convert_benign(overhaul):
|
|||||||
else:
|
else:
|
||||||
log_index = int(logged)
|
log_index = int(logged)
|
||||||
|
|
||||||
for index, cfg in enumerate(tqdm(os.listdir(cfg_dir))):
|
cdg_list = os.listdir(cfg_dir)
|
||||||
|
for index, cfg in enumerate(tqdm(cdg_list)):
|
||||||
if index < log_index:
|
if index < log_index:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -153,6 +154,8 @@ def convert_benign(overhaul):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
process_log.write("index {}, {} process failed. ValueError occurred.\n".format(index, cfg))
|
process_log.write("index {}, {} process failed. ValueError occurred.\n".format(index, cfg))
|
||||||
continue
|
continue
|
||||||
|
except KeyError:
|
||||||
|
process_log.write("index {}, {} process failed. KeyError occurred.\n".format(index, cfg))
|
||||||
finally:
|
finally:
|
||||||
cfg_file.close()
|
cfg_file.close()
|
||||||
|
|
||||||
@ -230,4 +233,4 @@ def convert_benign(overhaul):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# convert(35, 69)
|
# convert(35, 69)
|
||||||
convert_benign(True)
|
convert_benign(False)
|
||||||
|
24
Genius3/raw-feature-extractor/generate_asm_file.py
Normal file
24
Genius3/raw-feature-extractor/generate_asm_file.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
from func import *
|
||||||
|
from idc import *
|
||||||
|
|
||||||
|
|
||||||
|
def generate_asm_file():
|
||||||
|
binary_name = idc.GetInputFile()
|
||||||
|
|
||||||
|
# workflow = idc.ARGV[1]
|
||||||
|
|
||||||
|
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
||||||
|
analysis_flags &= ~idc.AF_IMMOFF
|
||||||
|
idc.SetShortPrm(idc.INF_START_AF, analysis_flags)
|
||||||
|
idaapi.autoWait()
|
||||||
|
|
||||||
|
# 生成pe文件的asm文件
|
||||||
|
idc.GenerateFile(idc.OFILE_ASM, binary_name + ".asm", 0, idc.BADADDR, 0)
|
||||||
|
|
||||||
|
# 由于命令行模式也必须打开ida pro,因此每次结束自动关闭ida
|
||||||
|
idc.Exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
generate_asm_file()
|
@ -26,7 +26,7 @@ def benign_batch_mode(overhaul):
|
|||||||
|
|
||||||
log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log_benign.log'
|
log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log_benign.log'
|
||||||
process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log_benign.log'
|
process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log_benign.log'
|
||||||
benign_pe_dir = 'D:\\hkn\\infected\\datasets\\benign\\new'
|
benign_pe_dir = 'F:\\kkk\\dataset\\benign\\refind'
|
||||||
|
|
||||||
if overhaul:
|
if overhaul:
|
||||||
if os.path.exists(log_path):
|
if os.path.exists(log_path):
|
||||||
@ -41,7 +41,8 @@ def benign_batch_mode(overhaul):
|
|||||||
else:
|
else:
|
||||||
log_index = int(logged)
|
log_index = int(logged)
|
||||||
|
|
||||||
for index, pe in enumerate(tqdm(sorted(os.listdir(benign_pe_dir)))):
|
pe_list = os.listdir(benign_pe_dir)
|
||||||
|
for index, pe in enumerate(tqdm(sorted(pe_list))):
|
||||||
if index < log_index:
|
if index < log_index:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -78,7 +79,7 @@ def benign_batch_mode(overhaul):
|
|||||||
print('总失败数{}'.format(total_failed))
|
print('总失败数{}'.format(total_failed))
|
||||||
|
|
||||||
|
|
||||||
def mal_batch_mode(start, end):
|
def mal_batch_mode(start, end, overhaul):
|
||||||
# 只选其中这些类的pe进行分析,其他的就直接跳过
|
# 只选其中这些类的pe进行分析,其他的就直接跳过
|
||||||
families_need_to_analyze = {'wacatac': 0, 'glupteba': 0, 'ulpm': 0, 'fugrafa': 0, 'tiggre': 0,
|
families_need_to_analyze = {'wacatac': 0, 'glupteba': 0, 'ulpm': 0, 'fugrafa': 0, 'tiggre': 0,
|
||||||
'redcap': 0, 'generickdz': 0, 'berbew': 0, 'agenttesla': 0, 'lazy': 0}
|
'redcap': 0, 'generickdz': 0, 'berbew': 0, 'agenttesla': 0, 'lazy': 0}
|
||||||
@ -94,6 +95,13 @@ def mal_batch_mode(start, end):
|
|||||||
family_path = 'D:\\hkn\\infected\\datasets\\virusshare_family\\virusshare_family{}.txt'.format(workflow)
|
family_path = 'D:\\hkn\\infected\\datasets\\virusshare_family\\virusshare_family{}.txt'.format(workflow)
|
||||||
log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log{}.log'.format(workflow)
|
log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log{}.log'.format(workflow)
|
||||||
process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log{}.log'.format(workflow)
|
process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log{}.log'.format(workflow)
|
||||||
|
|
||||||
|
if overhaul:
|
||||||
|
if os.path.exists(log_path):
|
||||||
|
os.remove(log_path)
|
||||||
|
if os.path.exists(process_log_path):
|
||||||
|
os.remove(process_log_path)
|
||||||
|
|
||||||
with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log, open(family_path,
|
with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log, open(family_path,
|
||||||
'r') as family_file:
|
'r') as family_file:
|
||||||
logged = log.readline()
|
logged = log.readline()
|
||||||
@ -165,8 +173,28 @@ def delete_output():
|
|||||||
os.remove(os.path.join(out_dir, f))
|
os.remove(os.path.join(out_dir, f))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_asm_batch_mode():
|
||||||
|
pe_dir = 'F:\\kkk\\dataset\\benign\\refind'
|
||||||
|
pe_list = os.listdir(pe_dir)
|
||||||
|
for pe in tqdm(pe_list):
|
||||||
|
cmd_line = r'idaq64 -c -A -S"D:\hkn\project_folder\Gencoding3\Genius3\raw-feature-extractor\generate_asm_file.py" -oF:\iout {}'.format(
|
||||||
|
os.path.join(pe_dir, pe))
|
||||||
|
|
||||||
|
p = multiprocessing.Process(target=call_preprocess, args=[cmd_line])
|
||||||
|
p.start()
|
||||||
|
while True:
|
||||||
|
if not p.is_alive():
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
delete_output()
|
||||||
|
|
||||||
|
|
||||||
# 注意:该py文件必须放在IDA的根目录下,且必须使用cmd命令执行,否则无法链接到python库
|
# 注意:该py文件必须放在IDA的根目录下,且必须使用cmd命令执行,否则无法链接到python库
|
||||||
# F:\\kkk\\IDA_6.6
|
# F:\\kkk\\IDA_6.6
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
benign_batch_mode(True)
|
benign_batch_mode(True)
|
||||||
# mal_batch_mode(35, 69)
|
# mal_batch_mode(35, 69, True)
|
||||||
|
# generate_asm_batch_mode()
|
||||||
|
|
||||||
|
@ -17,11 +17,13 @@ def preprocess():
|
|||||||
workflow = idc.ARGV[1]
|
workflow = idc.ARGV[1]
|
||||||
# workflow为特定值时分析良性软件,否则分析恶意软件
|
# workflow为特定值时分析良性软件,否则分析恶意软件
|
||||||
if workflow == '-1':
|
if workflow == '-1':
|
||||||
cfg_path = "D:\\hkn\\infected\\datasets\\benign_cfg\\new"
|
cfg_path = "F:\\kkk\\dataset\\benign\\refind_cfg\\{}.ida".format(binary_name)
|
||||||
gdl_path = "D:\\hkn\\infected\\datasets\\benign_dot\\new\\{}.dot".format(binary_name)
|
gdl_path = "F:\\kkk\\dataset\\benign\\refind_dot\\{}.dot".format(binary_name)
|
||||||
|
asm_path = "F:\\kkk\\dataset\\benign\\refind_asm\\{}.asm".format(binary_name)
|
||||||
else:
|
else:
|
||||||
cfg_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg".format(workflow)
|
cfg_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_cfg\\{}.ida".format(workflow, binary_name)
|
||||||
gdl_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot\\{}.dot".format(workflow, binary_name)
|
gdl_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_dot\\{}.dot".format(workflow, binary_name)
|
||||||
|
asm_path = "D:\\hkn\\infected\\datasets\\virusshare_infected{}_asm\\{}.asm".format(workflow, binary_name)
|
||||||
|
|
||||||
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
||||||
analysis_flags &= ~idc.AF_IMMOFF
|
analysis_flags &= ~idc.AF_IMMOFF
|
||||||
@ -30,14 +32,17 @@ def preprocess():
|
|||||||
|
|
||||||
# 生成pe文件的cfg列表
|
# 生成pe文件的cfg列表
|
||||||
cfgs = get_func_cfgs_c(FirstSeg())
|
cfgs = get_func_cfgs_c(FirstSeg())
|
||||||
# 生成pe文件的fcg
|
# 将cfg保存为.ida
|
||||||
|
pickle.dump(cfgs, open(cfg_path, 'w'))
|
||||||
|
|
||||||
|
# 生成pe文件的fcg,保存为.dot文件
|
||||||
# idc.GenCallGdl(gdl_path, 'Call Gdl', idc.CHART_GEN_GDL) 这个生成gdl文件,网上几乎找不到gdl这个格式
|
# idc.GenCallGdl(gdl_path, 'Call Gdl', idc.CHART_GEN_GDL) 这个生成gdl文件,网上几乎找不到gdl这个格式
|
||||||
idc.GenCallGdl(gdl_path, 'Call Gdl', idaapi.CHART_GEN_DOT)
|
idc.GenCallGdl(gdl_path, 'Call Gdl', idaapi.CHART_GEN_DOT)
|
||||||
|
|
||||||
full_path = os.path.join(cfg_path, binary_name + '.ida')
|
# 生成.asm文件
|
||||||
pickle.dump(cfgs, open(full_path, 'w'))
|
idc.GenerateFile(idc.OFILE_ASM, asm_path, 0, idc.BADADDR, 0)
|
||||||
|
|
||||||
# 由于命令行模式也必须打开ida pro,因此每次结束自动关闭ida
|
# 关闭IDA Pro
|
||||||
idc.Exit(0)
|
idc.Exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import json
|
|||||||
import random
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
import csv
|
||||||
|
|
||||||
|
|
||||||
def func():
|
def func():
|
||||||
@ -247,14 +248,45 @@ def delete_pe():
|
|||||||
# os.remove(os.path.join(dot_dir, cfg))
|
# os.remove(os.path.join(dot_dir, cfg))
|
||||||
|
|
||||||
|
|
||||||
|
def delete_error_benign():
|
||||||
|
jsonl_dir = 'F:\\kkk\\dataset\\benign\\refind_jsonl'
|
||||||
|
dot_dir = 'F:\\kkk\\dataset\\benign\\refind_dot'
|
||||||
|
cfg_dir = "F:\\kkk\\dataset\\benign\\refind_cfg"
|
||||||
|
asm_dir = "F:\\kkk\\dataset\\benign\\refind_asm"
|
||||||
|
pe_dir = "F:\\kkk\\dataset\\benign\\refind"
|
||||||
|
alist = os.listdir(pe_dir)
|
||||||
|
for f in alist:
|
||||||
|
if not os.path.exists(os.path.join(jsonl_dir, f + '.jsonl')):
|
||||||
|
os.remove(os.path.join(pe_dir, f))
|
||||||
|
if os.path.exists(os.path.join(asm_dir, f + '.asm')):
|
||||||
|
os.remove(os.path.join(asm_dir, f + '.asm'))
|
||||||
|
if os.path.exists(os.path.join(cfg_dir, f + '.ida')):
|
||||||
|
os.remove(os.path.join(cfg_dir, f + '.ida'))
|
||||||
|
if os.path.exists(os.path.join(dot_dir, f + '.dot')):
|
||||||
|
os.remove(os.path.join(dot_dir, f + '.dot'))
|
||||||
|
|
||||||
|
|
||||||
|
def generate_benign_csv():
|
||||||
|
benign_pe_dir = 'F:\\kkk\\dataset\\benign\\refind'
|
||||||
|
csv_out = 'F:\\kkk\\dataset\\benign_family.csv'
|
||||||
|
fieldnames = ['Id', 'Class']
|
||||||
|
with open(csv_out, "wb") as output_file:
|
||||||
|
writer = csv.DictWriter(output_file, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
for f in os.listdir(benign_pe_dir):
|
||||||
|
writer.writerow({fieldnames[0]: f, fieldnames[1]: '5'})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
generate_benign_csv()
|
||||||
|
# create_pixel_intensity()
|
||||||
# create_dir()
|
# create_dir()
|
||||||
# change_max_item_lines()
|
# change_max_item_lines()
|
||||||
# subprocess.call('taskkill /im idaq64.exe /f')
|
# subprocess.call('taskkill /im idaq64.exe /f')
|
||||||
# delete_error()
|
# delete_error_benign()
|
||||||
# test()
|
# test()
|
||||||
# delete_jsonl()
|
# delete_jsonl()
|
||||||
delete_all_local()
|
# delete_all_local()
|
||||||
# check_json()
|
# check_json()
|
||||||
# delete_pe()
|
# delete_pe()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user