Gencoding_Ke/Genius3/raw-feature-extractor/preprocessing_ida.py

55 lines
2.5 KiB
Python
Raw Normal View History

2021-11-18 17:43:34 +08:00
# -*- coding: UTF-8 -*-
2023-08-03 10:03:02 +08:00
import pickle
2021-11-18 17:43:34 +08:00
from func import *
from idc import *
import os
2023-08-03 10:03:02 +08:00
def preprocess():
# E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius3\acfgs" hpcenter
# print str(sys.argv) #['raw-feature-extractor/preprocessing_ida.py']
# print str(idc.ARGV) #['raw-feature-extractor/preprocessing_ida.py', '--path', 'C:\\Program1\\pycharmproject\\Genius3\\acfgs']
# print idc.ARGV[2]
# print type(idc.ARGV[2])
binary_name = idc.GetInputFile()
workflow = idc.ARGV[1]
2023-10-10 22:12:18 +08:00
# workflow为特定值时分析良性软件否则分析恶意软件
if workflow == '-1':
2024-01-06 18:47:26 +08:00
cfg_path = "D:\\bishe\\dataset\\benign\\refind_cfg\\{}.ida".format(binary_name)
gdl_path = "D:\\bishe\\dataset\\benign\\refind_dot\\{}.dot".format(binary_name)
asm_path = "D:\\bishe\\dataset\\benign\\refind_asm\\{}.asm".format(binary_name)
2023-10-10 22:12:18 +08:00
else:
2024-01-06 18:47:26 +08:00
cfg_path = "D:\\bishe\\dataset\\infected\\infected_cfg\\{}.ida".format(binary_name)
gdl_path = "D:\\bishe\\dataset\\infected\\infected_dot\\{}.dot".format(binary_name)
asm_path = "D:\\bishe\\dataset\\infected\\infected_asm\\{}.asm".format(binary_name)
2023-08-03 10:03:02 +08:00
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
analysis_flags &= ~idc.AF_IMMOFF
idc.SetShortPrm(idc.INF_START_AF, analysis_flags)
idaapi.autoWait()
# 生成pe文件的cfg列表
2024-03-03 14:34:19 +08:00
# cfgs = get_func_cfgs_c(FirstSeg())
2023-11-16 15:31:12 +08:00
# 将cfg保存为.ida
2024-03-03 14:34:19 +08:00
# pickle.dump(cfgs, open(cfg_path, 'w'))
2023-11-16 15:31:12 +08:00
# 生成pe文件的fcg保存为.dot文件
2023-08-03 10:03:02 +08:00
# idc.GenCallGdl(gdl_path, 'Call Gdl', idc.CHART_GEN_GDL) 这个生成gdl文件网上几乎找不到gdl这个格式
2024-03-03 14:34:19 +08:00
# idc.GenCallGdl(gdl_path, 'Call Gdl', idaapi.CHART_GEN_DOT)
2023-08-03 10:03:02 +08:00
2023-11-16 15:31:12 +08:00
# 生成.asm文件
idc.GenerateFile(idc.OFILE_ASM, asm_path, 0, idc.BADADDR, 0)
2021-11-18 17:43:34 +08:00
2023-11-16 15:31:12 +08:00
# 关闭IDA Pro
2023-08-03 10:03:02 +08:00
idc.Exit(0)
2021-11-18 17:43:34 +08:00
2023-08-03 10:03:02 +08:00
# 通用命令行格式 idaq64 -c -A -S"preprocessing_ida.py arg1 arg2" VirusShare_bca58b12923073
# 此处使用 idaq64 -c -A -S"preprocessing_ida.py workflow" -oF:\iout pe_path完整命令行如下
# F:\kkk\IDA_6.6\idaq64 -c -A -S"D:\hkn\project_folder\Gencoding3\Genius3\raw-feature-extractor\preprocessing_ida.py 0" -oF:\iout D:\hkn\infected\datasets\virusshare_infected0\VirusShare_bc161e5e792028e8137aa070fda53f82
2024-01-06 18:47:26 +08:00
# D:\IDA_Pro_v6.8\idaq64.exe -c -A -S"D:\bishe\Gencoding_KE\Genius3\raw-feature-extractor\preprocessing_ida.py 0" -oD:\bishe\dataset\out D:\bishe\dataset\train_malware\0ACDbR5M3ZhBJajygTuf
2021-11-18 17:43:34 +08:00
if __name__ == '__main__':
2023-08-03 10:03:02 +08:00
preprocess()