Compare commits
2 Commits
5de6c4568c
...
bd51d89a0b
Author | SHA1 | Date | |
---|---|---|---|
bd51d89a0b | |||
73c9da0599 |
18
ida_file_cerate.bat
Normal file
18
ida_file_cerate.bat
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal enabledelayedexpansion
|
||||||
|
|
||||||
|
set "IDA_PATH=D:\IDA_Pro_v6.8\idaq.exe"
|
||||||
|
set "FOLDER_PATH=D:\bishe\Gencoding\A2C"
|
||||||
|
set "SCRIPT_PATH=../raw-feature-extractor/preprocessing_ida.py"
|
||||||
|
set "SAVE_PATH=../store/"
|
||||||
|
set "LOG_PATH=../log/"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for %%f in ("%FOLDER_PATH%\*.exe") do (
|
||||||
|
echo !time! %%f
|
||||||
|
%IDA_PATH% -c -B -S"%SCRIPT_PATH% --path %SAVE_PATH%" %%f
|
||||||
|
)
|
||||||
|
|
||||||
|
endlocal
|
||||||
|
|
@ -1,3 +1,4 @@
|
|||||||
|
import idc
|
||||||
from func import *
|
from func import *
|
||||||
from raw_graphs import *
|
from raw_graphs import *
|
||||||
from idc import *
|
from idc import *
|
||||||
@ -13,8 +14,11 @@ def parse_command():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
# def main_op(store_file_path):
|
||||||
args = parse_command()
|
args = parse_command()
|
||||||
|
# path = os.path.join("../")
|
||||||
path = idc.ARGV[2]
|
path = idc.ARGV[2]
|
||||||
|
print os.getcwd()
|
||||||
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
||||||
analysis_flags &= ~idc.AF_IMMOFF
|
analysis_flags &= ~idc.AF_IMMOFF
|
||||||
# turn off "automatically make offset" heuristic
|
# turn off "automatically make offset" heuristic
|
||||||
@ -25,3 +29,6 @@ if __name__ == '__main__':
|
|||||||
fullpath = os.path.join(path, binary_name)
|
fullpath = os.path.join(path, binary_name)
|
||||||
pickle.dump(cfgs, open(fullpath, 'w'))
|
pickle.dump(cfgs, open(fullpath, 'w'))
|
||||||
idc.Exit(0)
|
idc.Exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
@ -7,11 +8,6 @@ import hashlib
|
|||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
def print_obj(obj):
|
|
||||||
"打印对象的所有属性"
|
|
||||||
print(obj.__dict__)
|
|
||||||
|
|
||||||
|
|
||||||
def calc_sha256(file_path):
|
def calc_sha256(file_path):
|
||||||
with open(file_path, 'rb') as f:
|
with open(file_path, 'rb') as f:
|
||||||
bytes = f.read()
|
bytes = f.read()
|
||||||
@ -25,10 +21,20 @@ import pickle
|
|||||||
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
||||||
# sub_166C4 393
|
# sub_166C4 393
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
file_path = '../3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe'
|
file_name_list = os.listdir('../A2C/')
|
||||||
testpath = '../store/3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe.ida'
|
res_file = "../sample.jsonl"
|
||||||
|
sample_file = open(res_file, mode='a')
|
||||||
|
for file_name in file_name_list:
|
||||||
|
print file_name
|
||||||
|
file_path = '../A2C/' + file_name
|
||||||
|
testpath = '../store/' + file_name + '.ida'
|
||||||
|
if os.path.exists(testpath) and os.path.splitext(file_path)[-1].lower() == '.exe':
|
||||||
fr = open(testpath, 'r')
|
fr = open(testpath, 'r')
|
||||||
data1 = pickle.load(fr) # 一个二进制文件的acfgs
|
data1 = pickle.load(fr) # 一个二进制文件的acfgs
|
||||||
|
# funtion num
|
||||||
|
function_number = len(data1.raw_graph_list)
|
||||||
|
if function_number == 0:
|
||||||
|
continue
|
||||||
# function_edges
|
# function_edges
|
||||||
function_edge_start = []
|
function_edge_start = []
|
||||||
function_edge_end = []
|
function_edge_end = []
|
||||||
@ -39,8 +45,7 @@ if __name__ == '__main__':
|
|||||||
fun_name_temp = []
|
fun_name_temp = []
|
||||||
# function hsah
|
# function hsah
|
||||||
file_hash = calc_sha256(file_path)
|
file_hash = calc_sha256(file_path)
|
||||||
# funtion num
|
|
||||||
function_number = len(data1.raw_graph_list)
|
|
||||||
acfg_list = []
|
acfg_list = []
|
||||||
# 函数级特征
|
# 函数级特征
|
||||||
for i in range(len(data1.raw_graph_list)):
|
for i in range(len(data1.raw_graph_list)):
|
||||||
@ -74,7 +79,8 @@ if __name__ == '__main__':
|
|||||||
# total instructions
|
# total instructions
|
||||||
block_features.append(temp_G.node[temp]['numIns'])
|
block_features.append(temp_G.node[temp]['numIns'])
|
||||||
# string or integer constants
|
# string or integer constants
|
||||||
block_features.append(len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len(
|
block_features.append(
|
||||||
|
len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len(
|
||||||
temp_G.node[temp]['consts']))
|
temp_G.node[temp]['consts']))
|
||||||
# offspring
|
# offspring
|
||||||
block_features.append(temp_G.node[temp]['offs'])
|
block_features.append(temp_G.node[temp]['offs'])
|
||||||
@ -85,13 +91,15 @@ if __name__ == '__main__':
|
|||||||
edge_list_start.append(item[0])
|
edge_list_start.append(item[0])
|
||||||
edge_list_end.append(item[1])
|
edge_list_end.append(item[1])
|
||||||
block_edges = [edge_list_start, edge_list_end]
|
block_edges = [edge_list_start, edge_list_end]
|
||||||
acfg_list_item = {"block_number": block_number, "block_edges": block_edges, "block_features": acfg_list_item_feature}
|
acfg_list_item = {"block_number": block_number, "block_edges": block_edges,
|
||||||
|
"block_features": acfg_list_item_feature}
|
||||||
acfg_list.append(acfg_list_item)
|
acfg_list.append(acfg_list_item)
|
||||||
|
|
||||||
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp, "hash": file_hash, "function_number": function_number}
|
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp,
|
||||||
|
"hash": file_hash, "function_number": function_number}
|
||||||
json_str = json.dumps(json_temp)
|
json_str = json.dumps(json_temp)
|
||||||
print json_str
|
sample_file.write(json_str)
|
||||||
|
else:
|
||||||
|
print "删除文件" + file_path
|
||||||
|
os.remove(file_path)
|
||||||
|
sample_file.close()
|
||||||
|
Loading…
Reference in New Issue
Block a user