Gencoding_Ke/Genius3/raw-feature-extractor/ida_batch.py

79 lines
3.0 KiB
Python
Raw Normal View History

2023-08-03 10:03:02 +08:00
# coding=utf-8
import os
import subprocess
import multiprocessing
from tqdm import tqdm
import time
# 单个pe文件处理超时/s
2023-08-07 20:48:21 +08:00
# 多次处理,一批数据中只有少量文件会超时
# 所有数据处理完成后可以对这些数据再进行一次更长超时时间的处理,若仍然超时则放弃
2023-08-03 10:03:02 +08:00
TIMEOUT = 60
def call_preprocess(cmd_line):
subprocess.call(cmd_line, shell=True)
def batch_mode():
2023-08-07 20:48:21 +08:00
for workflow in range(1, 20):
2023-08-03 10:03:02 +08:00
# workflow = 0
pe_dir = 'D:\\hkn\\infected\\datasets\\virusshare_infected{}'.format(workflow)
# for test
# pe_dir = 'D:\\hkn\\infected\\datasets\\virusshare_test'
log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_log{}.log'.format(workflow)
process_log_path = 'D:\\hkn\\infected\\datasets\\logging\\ida_process_log{}.log'.format(workflow)
with open(log_path, 'a+') as log, open(process_log_path, 'a+') as process_log:
logged = log.readline()
if logged == '':
log_index = 0
else:
log_index = int(logged)
# pe = "VirusShare_bc161e5e792028e8137aa070fda53f82"
for index, pe in enumerate(tqdm(sorted(os.listdir(pe_dir)))):
if index < log_index:
continue
# for test
# cmd_line = r'idaq64 -c -A -S"D:\hkn\project_folder\Gencoding3\Genius3\raw-feature-extractor\preprocessing_ida.py {}" -oF:\iout {}'.format(
# workflow, os.path.join(pe_dir, pe))
2023-08-07 20:48:21 +08:00
cmd_line = r'idaq64 -c -A -S"D:\hkn\project_folder\Gencoding3\Genius3\raw-feature-extractor\preprocessing_ida.py {}" -oF:\iout {}'.format(
workflow, os.path.join(pe_dir, pe))
2023-08-03 10:03:02 +08:00
p = multiprocessing.Process(target=call_preprocess, args=[cmd_line])
p.start()
flag_kill = True
start = time.time()
while time.time() - start <= TIMEOUT:
if not p.is_alive():
flag_kill = False
break
else:
time.sleep(1)
if flag_kill:
subprocess.call('taskkill /im idaq64.exe /f')
2023-08-07 20:48:21 +08:00
process_log.write(
"index {}, {} in workflow {} stuck, process terminated.\n".format(index, pe, workflow))
2023-08-03 10:03:02 +08:00
else:
# 正常运行结束
log.truncate(0)
log.seek(0)
log.write(str(index))
log.flush()
process_log.write("index {}, {} process done.\n".format(index, pe))
2023-08-07 20:48:21 +08:00
# 一次workflow结束后将所有副产物删除
delete_output()
def delete_output():
out_dir = 'F:\\iout'
os.rmdir(out_dir)
os.mkdir(out_dir)
2023-08-03 10:03:02 +08:00
# 注意该py文件必须放在IDA的根目录下且必须使用cmd命令执行否则无法链接到python库
if __name__ == '__main__':
batch_mode()