jsonl生成
This commit is contained in:
parent
bd51d89a0b
commit
337140a26e
7
fun_count.py
Normal file
7
fun_count.py
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
file_name = './fun_count.jsonl'
|
||||||
|
fil = open(file_name, mode='r')
|
||||||
|
for item in tqdm(fil):
|
@ -1,18 +1,20 @@
|
|||||||
@echo off
|
@echo off
|
||||||
setlocal enabledelayedexpansion
|
setlocal EnableDelayedExpansion
|
||||||
|
|
||||||
set "IDA_PATH=D:\IDA_Pro_v6.8\idaq.exe"
|
set "IDA_PATH=D:\IDA_Pro_v6.8\idaq.exe"
|
||||||
set "FOLDER_PATH=D:\bishe\Gencoding\A2C"
|
set "FOLDER_PATH=D:\bishe\Gencoding\train_malware"
|
||||||
set "SCRIPT_PATH=../raw-feature-extractor/preprocessing_ida.py"
|
set "SCRIPT_PATH=../raw-feature-extractor/preprocessing_ida.py"
|
||||||
set "SAVE_PATH=../store/"
|
set "SAVE_PATH=../train_malware_result/"
|
||||||
set "LOG_PATH=../log/"
|
|
||||||
|
|
||||||
|
for %%f in ("%FOLDER_PATH%\*.*") do (
|
||||||
|
echo %%f
|
||||||
for %%f in ("%FOLDER_PATH%\*.exe") do (
|
if /i "%%~xf"==".idb" (
|
||||||
echo !time! %%f
|
echo Found IDB file: %%f
|
||||||
%IDA_PATH% -c -B -S"%SCRIPT_PATH% --path %SAVE_PATH%" %%f
|
) else (
|
||||||
|
echo !time! %%f
|
||||||
|
%IDA_PATH% -c -A -S"%SCRIPT_PATH% --path %SAVE_PATH%" %%f
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
endlocal
|
endlocal
|
||||||
|
|
||||||
|
96
raw-feature-extractor/external_test.py
Normal file
96
raw-feature-extractor/external_test.py
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import networkx as nx
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
done_index = 0
|
||||||
|
file_name_list = os.listdir('../A2C/')
|
||||||
|
res_file = "../sample.jsonl"
|
||||||
|
for file_name in file_name_list:
|
||||||
|
file_path = '../A2C/' + file_name
|
||||||
|
testpath = '../store/' + file_name + '.ida'
|
||||||
|
if os.path.exists(testpath) and os.path.splitext(file_path)[-1].lower() == '.exe':
|
||||||
|
fr = open(testpath, 'r')
|
||||||
|
data1 = pickle.load(fr)
|
||||||
|
for graph in data1.raw_graph_list:
|
||||||
|
for i in range(len(graph.old_g.node)):
|
||||||
|
if len(graph.old_g.node[i]['externs']) != 0:
|
||||||
|
print graph.old_g.node[i]['externs']
|
||||||
|
# for i in range(len(data1.raw_graph_list)):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# fr = open(testpath, 'r')
|
||||||
|
# data1 = pickle.load(fr)
|
||||||
|
# # funtion num
|
||||||
|
# function_number = len(data1.raw_graph_list)
|
||||||
|
# if function_number == 0:
|
||||||
|
# continue
|
||||||
|
# # function_edges
|
||||||
|
# function_edge_start = []
|
||||||
|
# function_edge_end = []
|
||||||
|
# for item in data1.raw_graph_list[0].old_g.edges:
|
||||||
|
# function_edge_start.append(item[0])
|
||||||
|
# function_edge_end.append(item[1])
|
||||||
|
# function_edges = [function_edge_start, function_edge_end]
|
||||||
|
# fun_name_temp = []
|
||||||
|
# # function hsah
|
||||||
|
# acfg_list = []
|
||||||
|
|
||||||
|
# for i in range(len(data1.raw_graph_list)):
|
||||||
|
#
|
||||||
|
# # function name
|
||||||
|
# fun_name_temp.append(data1.raw_graph_list[i].funcname)
|
||||||
|
# # block features
|
||||||
|
# temp_G = data1.raw_graph_list[i].old_g
|
||||||
|
# # block_number
|
||||||
|
# block_number = len(temp_G.node)
|
||||||
|
# # block_features
|
||||||
|
# acfg_list_item_feature = []
|
||||||
|
# for temp in range(len(temp_G.node)):
|
||||||
|
# block_features = []
|
||||||
|
# # call
|
||||||
|
# block_features.append(temp_G.node[temp]['numCalls'])
|
||||||
|
# # transfer
|
||||||
|
# block_features.append(temp_G.node[temp]['numTIs'])
|
||||||
|
# # arithmetic
|
||||||
|
# block_features.append(temp_G.node[temp]['numAs'])
|
||||||
|
# # logic
|
||||||
|
# block_features.append(temp_G.node[temp]['numLIs'])
|
||||||
|
# # compare
|
||||||
|
# block_features.append(temp_G.node[temp]['numCom'])
|
||||||
|
# # move
|
||||||
|
# block_features.append(temp_G.node[temp]['numMov'])
|
||||||
|
# # termination
|
||||||
|
# block_features.append(temp_G.node[temp]['numTerm'])
|
||||||
|
# # date declaration
|
||||||
|
# block_features.append(temp_G.node[temp]['numDD'])
|
||||||
|
# # total instructions
|
||||||
|
# block_features.append(temp_G.node[temp]['numIns'])
|
||||||
|
# # string or integer constants
|
||||||
|
# block_features.append(
|
||||||
|
# len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len(
|
||||||
|
# temp_G.node[temp]['consts']))
|
||||||
|
# # offspring
|
||||||
|
# block_features.append(temp_G.node[temp]['offs'])
|
||||||
|
# acfg_list_item_feature.append(block_features)
|
||||||
|
# edge_list_start = []
|
||||||
|
# edge_list_end = []
|
||||||
|
# for item in temp_G.edges:
|
||||||
|
# edge_list_start.append(item[0])
|
||||||
|
# edge_list_end.append(item[1])
|
||||||
|
# block_edges = [edge_list_start, edge_list_end]
|
||||||
|
# acfg_list_item = {"block_number": block_number, "block_edges": block_edges,
|
||||||
|
# "block_features": acfg_list_item_feature}
|
||||||
|
# acfg_list.append(acfg_list_item)
|
||||||
|
|
@ -20,7 +20,11 @@ import pickle
|
|||||||
|
|
||||||
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
||||||
# sub_166C4 393
|
# sub_166C4 393
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
done_index = 0
|
||||||
file_name_list = os.listdir('../A2C/')
|
file_name_list = os.listdir('../A2C/')
|
||||||
res_file = "../sample.jsonl"
|
res_file = "../sample.jsonl"
|
||||||
sample_file = open(res_file, mode='a')
|
sample_file = open(res_file, mode='a')
|
||||||
@ -98,7 +102,9 @@ if __name__ == '__main__':
|
|||||||
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp,
|
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp,
|
||||||
"hash": file_hash, "function_number": function_number}
|
"hash": file_hash, "function_number": function_number}
|
||||||
json_str = json.dumps(json_temp)
|
json_str = json.dumps(json_temp)
|
||||||
sample_file.write(json_str)
|
sample_file.write(json_str + '\n')
|
||||||
|
print "完成写入" + str(done_index)
|
||||||
|
done_index += 1
|
||||||
else:
|
else:
|
||||||
print "删除文件" + file_path
|
print "删除文件" + file_path
|
||||||
os.remove(file_path)
|
os.remove(file_path)
|
||||||
|
Loading…
Reference in New Issue
Block a user