批量化操作
This commit is contained in:
parent
0f1e3378a2
commit
548eedb292
@ -4,7 +4,7 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/Genius3/python" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/Genius3/python" isTestSource="false" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="jdk" jdkName="Python 2.7" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="malgraph" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PyDocumentationSettings">
|
<component name="PyDocumentationSettings">
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="malgraph" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
@ -2,7 +2,7 @@
|
|||||||
setlocal EnableDelayedExpansion
|
setlocal EnableDelayedExpansion
|
||||||
|
|
||||||
|
|
||||||
set "FOLDER_PATH=D:\bishe\dataset\train_benign"
|
set "FOLDER_PATH=D:\bishe\dataset\train_benign_part0"
|
||||||
|
|
||||||
|
|
||||||
|
|
16
Genius3/bat_file/malware/ida_file_cerate_malware.bat
Normal file
16
Genius3/bat_file/malware/ida_file_cerate_malware.bat
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal EnableDelayedExpansion
|
||||||
|
|
||||||
|
|
||||||
|
set "FOLDER_PATH=D:\bishe\dataset\sample_20230130_458"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for %%f in ("%FOLDER_PATH%\*") do (
|
||||||
|
echo !time! %%f
|
||||||
|
D:\IDA_Pro_v6.8\idaq64.exe -c -A -S"D:\bishe\Gencoding_KE\Genius3\raw-feature-extractor\preprocessing_ida.py 0" -oD:\bishe\dataset\out %%f
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
endlocal
|
||||||
|
|
@ -1,16 +0,0 @@
|
|||||||
# -*- coding: UTF-8 -*-
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from func import *
|
|
||||||
from raw_graphs import *
|
|
||||||
from idc import *
|
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print "hello"
|
|
||||||
|
|
||||||
#
|
|
||||||
# E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -A -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius3\acfgs" hpcenter
|
|
||||||
# -c 删除旧数据库 -A 自动分析,不显示对话框
|
|
||||||
# -B 相当于 -c -A
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
|||||||
class HierarchicalGraphNeuralNetwork(nn.Module):
|
|
||||||
def __init__(self, external_vocab: Vocab):
|
|
||||||
super(HierarchicalGraphNeuralNetwork, self).__init__()
|
|
||||||
self.pool = 'global_max_pool'
|
|
||||||
# Hierarchical 1: Control Flow Graph (CFG) embedding and pooling
|
|
||||||
cfg_filter_list =[200, 200]
|
|
||||||
cfg_filter_list.insert(0, 11)
|
|
||||||
self.cfg_filter_length = len(cfg_filter_list)
|
|
||||||
cfg_graphsage_params = [dict(in_channels=cfg_filter_list[i], out_channels=cfg_filter_list[i + 1], bias=True) for
|
|
||||||
i in range(self.cfg_filter_length - 1)]
|
|
||||||
cfg_conv = dict(constructor=torch_geometric.nn.conv.SAGEConv, kwargs=cfg_graphsage_params)
|
|
||||||
cfg_constructor = cfg_conv['constructor']
|
|
||||||
for i in range(self.cfg_filter_length - 1):
|
|
||||||
setattr(self, 'CFG_gnn_{}'.format(i + 1), cfg_constructor(**cfg_conv['kwargs'][i]))
|
|
||||||
self.dropout = nn.Dropout(p=0.2)
|
|
||||||
# Hierarchical 2: Function Call Graph (FCG) embedding and pooling
|
|
||||||
self.external_embedding_layer = nn.Embedding(num_embeddings=external_vocab.max_vocab_size + 2,
|
|
||||||
embedding_dim=cfg_filter_list[-1],
|
|
||||||
padding_idx=external_vocab.pad_idx)
|
|
||||||
fcg_filter_list = [200, 200]
|
|
||||||
fcg_filter_list.insert(0, cfg_filter_list[-1])
|
|
||||||
self.fcg_filter_length = len(fcg_filter_list)
|
|
||||||
fcg_graphsage_params = [dict(in_channels=fcg_filter_list[i], out_channels=fcg_filter_list[i + 1], bias=True) for
|
|
||||||
i in range(self.fcg_filter_length - 1)]
|
|
||||||
fcg_conv = dict(constructor=torch_geometric.nn.conv.SAGEConv, kwargs=fcg_graphsage_params)
|
|
||||||
fcg_constructor = fcg_conv['constructor']
|
|
||||||
for i in range(self.fcg_filter_length - 1):
|
|
||||||
setattr(self, 'FCG_gnn_{}'.format(i + 1), fcg_constructor(**fcg_conv['kwargs'][i]))
|
|
||||||
# Last Projection Function: gradually project with more linear layers
|
|
||||||
self.pj1 = torch.nn.Linear(in_features=fcg_filter_list[-1], out_features=int(fcg_filter_list[-1] / 2))
|
|
||||||
self.pj2 = torch.nn.Linear(in_features=int(fcg_filter_list[-1] / 2), out_features=int(fcg_filter_list[-1] / 4))
|
|
||||||
self.pj3 = torch.nn.Linear(in_features=int(fcg_filter_list[-1] / 4), out_features=6)
|
|
||||||
self.last_activation = nn.Softmax(dim=1)
|
|
||||||
|
|
||||||
def forward(self, real_local_batch: Batch, real_bt_positions: list, bt_external_names: list,
|
|
||||||
bt_all_function_edges: list):
|
|
||||||
rtn_local_batch = self.forward_cfg_gnn(local_batch=real_local_batch)
|
|
||||||
x_cfg_pool = torch_geometric.nn.glob.global_max_pool(x=rtn_local_batch.x, batch=rtn_local_batch.batch)
|
|
||||||
fcg_list = []
|
|
||||||
fcg_internal_list = []
|
|
||||||
for idx_batch in range(len(real_bt_positions) - 1):
|
|
||||||
start_pos, end_pos = real_bt_positions[idx_batch: idx_batch + 2]
|
|
||||||
idx_x_cfg = x_cfg_pool[start_pos: end_pos]
|
|
||||||
fcg_internal_list.append(idx_x_cfg)
|
|
||||||
idx_x_external = self.external_embedding_layer(
|
|
||||||
torch.tensor([bt_external_names[idx_batch]], dtype=torch.long))
|
|
||||||
idx_x_external = idx_x_external.squeeze(dim=0)
|
|
||||||
idx_x_total = torch.cat([idx_x_cfg, idx_x_external], dim=0)
|
|
||||||
idx_function_edge = torch.tensor(bt_all_function_edges[idx_batch], dtype=torch.long)
|
|
||||||
idx_graph_data = Data(x=idx_x_total, edge_index=idx_function_edge)
|
|
||||||
idx_graph_data.validate()
|
|
||||||
fcg_list.append(idx_graph_data)
|
|
||||||
fcg_batch = Batch.from_data_list(fcg_list)
|
|
||||||
# Hierarchical 2: Function Call Graph (FCG) embedding and pooling
|
|
||||||
rtn_fcg_batch = self.forward_fcg_gnn(function_batch=fcg_batch) # [batch_size, max_node_size, dim]
|
|
||||||
x_fcg_pool = torch_geometric.nn.glob.global_max_pool(x=rtn_fcg_batch.x, batch=rtn_fcg_batch.batch)
|
|
||||||
batch_final = x_fcg_pool
|
|
||||||
# step last project to the number_of_classes (multiclass)
|
|
||||||
bt_final_embed = self.pj3(self.pj2(self.pj1(batch_final)))
|
|
||||||
bt_pred = self.last_activation(bt_final_embed)
|
|
||||||
return bt_pred
|
|
||||||
|
|
||||||
def forward_cfg_gnn(self, local_batch: Batch):
|
|
||||||
in_x, edge_index = local_batch.x, local_batch.edge_index
|
|
||||||
for i in range(self.cfg_filter_length - 1):
|
|
||||||
out_x = getattr(self, 'CFG_gnn_{}'.format(i + 1))(x=in_x, edge_index=edge_index)
|
|
||||||
out_x = torch.nn.functional.relu(out_x, inplace=True)
|
|
||||||
out_x = self.dropout(out_x)
|
|
||||||
in_x = out_x
|
|
||||||
local_batch.x = in_x
|
|
||||||
return local_batch
|
|
||||||
|
|
||||||
def forward_fcg_gnn(self, function_batch: Batch):
|
|
||||||
in_x, edge_index = function_batch.x, function_batch.edge_index
|
|
||||||
for i in range(self.fcg_filter_length - 1):
|
|
||||||
out_x = getattr(self, 'FCG_gnn_{}'.format(i + 1))(x=in_x, edge_index=edge_index)
|
|
||||||
out_x = torch.nn.functional.relu(out_x, inplace=True)
|
|
||||||
out_x = self.dropout(out_x)
|
|
||||||
in_x = out_x
|
|
||||||
function_batch.x = in_x
|
|
||||||
return function_batch
|
|
@ -242,7 +242,5 @@ def convert_benign(overhaul):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# convert(35, 69)
|
|
||||||
# convert_benign(True)
|
|
||||||
convert_benign(True)
|
convert_benign(True)
|
||||||
convert_malware(True)
|
convert_malware(True)
|
||||||
|
@ -1,264 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
#
|
|
||||||
# Reference Lister
|
|
||||||
#
|
|
||||||
# List all functions and all references to them in the current section.
|
|
||||||
#
|
|
||||||
# Implemented with the idautils module
|
|
||||||
#
|
|
||||||
import networkx as nx
|
|
||||||
import pdb
|
|
||||||
from graph_analysis_ida import *
|
|
||||||
from graph_property import *
|
|
||||||
|
|
||||||
|
|
||||||
# import wingdbstub
|
|
||||||
# wingdbstub.Ensure()
|
|
||||||
|
|
||||||
def get_funcs(ea):
|
|
||||||
funcs = {}
|
|
||||||
# Get current ea
|
|
||||||
# Loop from start to end in the current segment
|
|
||||||
for funcea in Functions(SegStart(ea)):
|
|
||||||
funcname = GetFunctionName(funcea)
|
|
||||||
func = get_func(funcea)
|
|
||||||
blocks = FlowChart(func)
|
|
||||||
funcs[funcname] = []
|
|
||||||
for bl in blocks:
|
|
||||||
start = bl.startEA
|
|
||||||
end = bl.endEA
|
|
||||||
funcs[funcname].append((start, end))
|
|
||||||
return funcs
|
|
||||||
|
|
||||||
|
|
||||||
# 似乎是没用的函数
|
|
||||||
# def get_funcs_for_discoverRe(ea):
|
|
||||||
# features = {}
|
|
||||||
# for funcea in Functions(SegStart(ea)):
|
|
||||||
# funcname = GetFunctionName(funcea)
|
|
||||||
# print(funcname)
|
|
||||||
# func = get_func(funcea)
|
|
||||||
# feature = get_discoverRe_feature(func)
|
|
||||||
# features[funcname] = feature
|
|
||||||
# return features
|
|
||||||
|
|
||||||
|
|
||||||
# 获取所有bb的11维属性特征
|
|
||||||
# 调用/传输/算术/逻辑/比较/移动/终止/数据声明/总指令数/字符串或整数常量/后代的数量
|
|
||||||
def get_bb_features(func):
|
|
||||||
bb_features = []
|
|
||||||
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
|
||||||
for bl in blocks:
|
|
||||||
calls = calCalls(bl)
|
|
||||||
transferIns = calTransferIns(bl)
|
|
||||||
mathematicsIns = calArithmeticIns(bl)
|
|
||||||
logicIns = calLogicInstructions(bl)
|
|
||||||
cmpIns = calIns(bl, {'cmp': 1, 'cmps': 1, 'cmpsb': 1, 'cmppd': 1, 'cmpps': 1, 'fcom': 1, 'fcomp': 1, 'fcompp': 1, 'ficom': 1, 'ficomp': 1, 'ptest': 1, 'test': 1})
|
|
||||||
movIns = calIns(bl, {'mov': 1, 'movb': 1, 'movw': 1, 'movl': 1, 'movq': 1, 'movabsq': 1, 'push': 1, 'pop': 1, 'lea': 1})
|
|
||||||
interruptIns = calIns(bl, {'int1': 1, 'int3': 1, 'into': 1, 'iret': 1, 'iretd': 1, 'iretq': 1})
|
|
||||||
declareIns = calIns(bl, {'dw': 1, 'dd': 1, 'db': 1})
|
|
||||||
totalIns = calInsts(bl)
|
|
||||||
consts = getBBconsts(bl)
|
|
||||||
stringOrIntConsts = len(consts[0]) + len(consts[1])
|
|
||||||
bb_features.append([calls, transferIns, mathematicsIns, logicIns, cmpIns, movIns,
|
|
||||||
interruptIns, declareIns, totalIns, stringOrIntConsts])
|
|
||||||
return bb_features
|
|
||||||
|
|
||||||
|
|
||||||
def get_discoverRe_feature(func, icfg):
|
|
||||||
start = func.startEA
|
|
||||||
end = func.endEA
|
|
||||||
features = []
|
|
||||||
FunctionCalls = getFuncCalls(func)
|
|
||||||
# 1
|
|
||||||
features.append(FunctionCalls)
|
|
||||||
LogicInstr = getLogicInsts(func)
|
|
||||||
# 2
|
|
||||||
features.append(LogicInstr)
|
|
||||||
Transfer = getTransferInsts(func)
|
|
||||||
# 3
|
|
||||||
features.append(Transfer)
|
|
||||||
Locals = getLocalVariables(func)
|
|
||||||
# 4
|
|
||||||
features.append(Locals)
|
|
||||||
BB = getBasicBlocks(func)
|
|
||||||
# 5
|
|
||||||
features.append(BB)
|
|
||||||
Edges = len(icfg.edges())
|
|
||||||
# 6
|
|
||||||
features.append(Edges)
|
|
||||||
Incoming = getIncommingCalls(func)
|
|
||||||
# 7
|
|
||||||
features.append(Incoming)
|
|
||||||
# 8
|
|
||||||
Instrs = getIntrs(func)
|
|
||||||
features.append(Instrs)
|
|
||||||
between = retrieveGP(icfg)
|
|
||||||
# 9
|
|
||||||
features.append(between)
|
|
||||||
|
|
||||||
strings, consts = getfunc_consts(func)
|
|
||||||
# 10
|
|
||||||
features.append(strings)
|
|
||||||
# 11
|
|
||||||
features.append(consts)
|
|
||||||
return features
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_names(ea):
|
|
||||||
funcs = {}
|
|
||||||
for funcea in Functions(SegStart(ea)):
|
|
||||||
funcname = GetFunctionName(funcea)
|
|
||||||
funcs[funcname] = funcea
|
|
||||||
return funcs
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_bases(ea):
|
|
||||||
funcs = {}
|
|
||||||
for funcea in Functions(SegStart(ea)):
|
|
||||||
funcname = GetFunctionName(funcea)
|
|
||||||
funcs[funcea] = funcname
|
|
||||||
return funcs
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_range(ea):
|
|
||||||
funcs = {}
|
|
||||||
for funcea in Functions(SegStart(ea)):
|
|
||||||
funcname = GetFunctionName(funcea)
|
|
||||||
func = get_func(funcea)
|
|
||||||
funcs[funcname] = (func.startEA, func.endEA)
|
|
||||||
return funcs
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_sequences(ea):
|
|
||||||
funcs_bodylist = {}
|
|
||||||
funcs = get_funcs(ea)
|
|
||||||
for funcname in funcs:
|
|
||||||
if funcname not in funcs_bodylist:
|
|
||||||
funcs_bodylist[funcname] = []
|
|
||||||
for start, end in funcs[funcname]:
|
|
||||||
inst_addr = start
|
|
||||||
while inst_addr <= end:
|
|
||||||
opcode = GetMnem(inst_addr)
|
|
||||||
funcs_bodylist[funcname].append(opcode)
|
|
||||||
inst_addr = NextHead(inst_addr)
|
|
||||||
return funcs_bodylist
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_cfgs(ea):
|
|
||||||
func_cfglist = {}
|
|
||||||
i = 0
|
|
||||||
start, end = get_section('LOAD')
|
|
||||||
# print start, end
|
|
||||||
for funcea in Functions(SegStart(ea)):
|
|
||||||
if start <= funcea <= end:
|
|
||||||
funcname = GetFunctionName(funcea)
|
|
||||||
func = get_func(funcea)
|
|
||||||
print(i)
|
|
||||||
i += 1
|
|
||||||
try:
|
|
||||||
icfg = cfg.cfg_construct(func)
|
|
||||||
func_cfglist[funcname] = icfg
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return func_cfglist
|
|
||||||
|
|
||||||
|
|
||||||
def get_section(t):
|
|
||||||
base = SegByName(t)
|
|
||||||
start = SegByBase(base)
|
|
||||||
end = SegEnd(start)
|
|
||||||
return start, end
|
|
||||||
|
|
||||||
|
|
||||||
def get_func_cfg_sequences(func_cfglist):
|
|
||||||
func_cfg_seqlist = {}
|
|
||||||
for funcname in func_cfglist:
|
|
||||||
func_cfg_seqlist[funcname] = {}
|
|
||||||
cfg = func_cfglist[funcname][0]
|
|
||||||
for start, end in cfg:
|
|
||||||
codesq = get_sequences(start, end)
|
|
||||||
func_cfg_seqlist[funcname][(start, end)] = codesq
|
|
||||||
|
|
||||||
return func_cfg_seqlist
|
|
||||||
|
|
||||||
|
|
||||||
def get_sequences(start, end):
|
|
||||||
seq = []
|
|
||||||
inst_addr = start
|
|
||||||
while inst_addr <= end:
|
|
||||||
opcode = GetMnem(inst_addr)
|
|
||||||
seq.append(opcode)
|
|
||||||
inst_addr = NextHead(inst_addr)
|
|
||||||
return seq
|
|
||||||
|
|
||||||
|
|
||||||
def get_stack_arg(func_addr):
|
|
||||||
print(func_addr)
|
|
||||||
args = []
|
|
||||||
stack = GetFrame(func_addr)
|
|
||||||
if not stack:
|
|
||||||
return []
|
|
||||||
firstM = GetFirstMember(stack)
|
|
||||||
lastM = GetLastMember(stack)
|
|
||||||
i = firstM
|
|
||||||
while i <= lastM:
|
|
||||||
mName = GetMemberName(stack, i)
|
|
||||||
mSize = GetMemberSize(stack, i)
|
|
||||||
if mSize:
|
|
||||||
i = i + mSize
|
|
||||||
else:
|
|
||||||
i = i + 4
|
|
||||||
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
|
|
||||||
args.append(mName)
|
|
||||||
return args
|
|
||||||
|
|
||||||
# pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
|
|
||||||
|
|
||||||
|
|
||||||
def processDataSegs():
|
|
||||||
funcdata = {}
|
|
||||||
datafunc = {}
|
|
||||||
for n in xrange(idaapi.get_segm_qty()):
|
|
||||||
seg = idaapi.getnseg(n)
|
|
||||||
ea = seg.startEA
|
|
||||||
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
|
||||||
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
|
|
||||||
start = idc.SegStart(ea)
|
|
||||||
end = idc.SegEnd(ea)
|
|
||||||
cur = start
|
|
||||||
while cur <= end:
|
|
||||||
refs = [v for v in DataRefsTo(cur)]
|
|
||||||
for fea in refs:
|
|
||||||
name = GetFunctionName(fea)
|
|
||||||
if len(name) == 0:
|
|
||||||
continue
|
|
||||||
if name not in funcdata:
|
|
||||||
funcdata[name] = [cur]
|
|
||||||
else:
|
|
||||||
funcdata[name].append(cur)
|
|
||||||
if cur not in datafunc:
|
|
||||||
datafunc[cur] = [name]
|
|
||||||
else:
|
|
||||||
datafunc[cur].append(name)
|
|
||||||
cur = NextHead(cur)
|
|
||||||
return funcdata, datafunc
|
|
||||||
|
|
||||||
|
|
||||||
def obtainDataRefs(callgraph):
|
|
||||||
datarefs = {}
|
|
||||||
funcdata, datafunc = processDataSegs()
|
|
||||||
for node in callgraph:
|
|
||||||
if node in funcdata:
|
|
||||||
datas = funcdata[node]
|
|
||||||
for dd in datas:
|
|
||||||
refs = datafunc[dd]
|
|
||||||
refs = list(set(refs))
|
|
||||||
if node in datarefs:
|
|
||||||
print(refs)
|
|
||||||
datarefs[node] += refs
|
|
||||||
datarefs[node] = list(set(datarefs[node]))
|
|
||||||
else:
|
|
||||||
datarefs[node] = refs
|
|
||||||
return datarefs
|
|
@ -16,9 +16,7 @@ from raw_graphs import *
|
|||||||
#from discovRe_feature.discovRe import *
|
#from discovRe_feature.discovRe import *
|
||||||
from discovRe import *
|
from discovRe import *
|
||||||
|
|
||||||
sys.path.append("D:\\hkn\\project_folder\\Gencoding3\\Genius3\\python")
|
|
||||||
#import wingdbstub
|
|
||||||
#wingdbstub.Ensure()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -119,24 +119,23 @@ def getIncommingCalls(func):
|
|||||||
|
|
||||||
|
|
||||||
def get_stackVariables(func_addr):
|
def get_stackVariables(func_addr):
|
||||||
#print func_addr
|
args = []
|
||||||
args = []
|
stack = GetFrame(func_addr)
|
||||||
stack = GetFrame(func_addr)
|
if not stack:
|
||||||
if not stack:
|
return 0
|
||||||
return 0
|
firstM = GetFirstMember(stack)
|
||||||
firstM = GetFirstMember(stack)
|
lastM = GetLastMember(stack)
|
||||||
lastM = GetLastMember(stack)
|
i = firstM
|
||||||
i = firstM
|
while i <= lastM:
|
||||||
while i <=lastM:
|
mName = GetMemberName(stack, i)
|
||||||
mName = GetMemberName(stack,i)
|
mSize = GetMemberSize(stack, i)
|
||||||
mSize = GetMemberSize(stack,i)
|
if mSize:
|
||||||
if mSize:
|
i = i + mSize
|
||||||
i = i + mSize
|
else:
|
||||||
else:
|
i = i + 4
|
||||||
i = i+4
|
if mName not in args and mName and 'var_' in mName:
|
||||||
if mName not in args and mName and 'var_' in mName:
|
args.append(mName)
|
||||||
args.append(mName)
|
return len(args)
|
||||||
return len(args)
|
|
||||||
|
|
||||||
|
|
||||||
# 计算算数指令数量
|
# 计算算数指令数量
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# coding=utf-8
|
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import idc
|
from func import *
|
||||||
|
from idc import *
|
||||||
import idaapi
|
import idaapi
|
||||||
|
|
||||||
# 定义常量
|
# 定义常量
|
||||||
@ -12,6 +12,7 @@ CFG_EXTENSION = ".ida"
|
|||||||
GDL_EXTENSION = ".dot"
|
GDL_EXTENSION = ".dot"
|
||||||
ASM_EXTENSION = ".asm"
|
ASM_EXTENSION = ".asm"
|
||||||
|
|
||||||
|
|
||||||
def preprocess(binary_name, workflow):
|
def preprocess(binary_name, workflow):
|
||||||
cfg_path = os.path.join(
|
cfg_path = os.path.join(
|
||||||
INFECTED_DIR if workflow != "-1" else BENIGN_DIR,
|
INFECTED_DIR if workflow != "-1" else BENIGN_DIR,
|
||||||
@ -29,9 +30,9 @@ def preprocess(binary_name, workflow):
|
|||||||
if os.path.exists(cfg_path):
|
if os.path.exists(cfg_path):
|
||||||
idc.Exit(0)
|
idc.Exit(0)
|
||||||
else:
|
else:
|
||||||
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
analysis_flags = idc.GetShortPrm(idc.INF_AF2)
|
||||||
analysis_flags &= ~idc.AF_IMMOFF
|
analysis_flags &= ~ida_ida.AF_IMMOFF
|
||||||
idc.SetShortPrm(idc.INF_START_AF, analysis_flags)
|
idc.SetShortPrm(idc.INF_AF2, analysis_flags)
|
||||||
|
|
||||||
idaapi.autoWait()
|
idaapi.autoWait()
|
||||||
|
|
||||||
@ -47,17 +48,21 @@ def preprocess(binary_name, workflow):
|
|||||||
# 关闭IDA Pro
|
# 关闭IDA Pro
|
||||||
idc.Exit(0)
|
idc.Exit(0)
|
||||||
|
|
||||||
|
|
||||||
def generate_cfg(binary_name, cfg_path):
|
def generate_cfg(binary_name, cfg_path):
|
||||||
cfgs = get_func_cfgs_c(FirstSeg())
|
cfgs = get_func_cfgs_c(FirstSeg())
|
||||||
with open(cfg_path, 'wb') as cfg_file:
|
with open(cfg_path, 'wb') as cfg_file:
|
||||||
pickle.dump(cfgs, cfg_file)
|
pickle.dump(cfgs, cfg_file)
|
||||||
|
|
||||||
|
|
||||||
def generate_gdl(gdl_path):
|
def generate_gdl(gdl_path):
|
||||||
idc.GenCallGdl(gdl_path, 'Call Gdl', idaapi.CHART_GEN_DOT)
|
idc.GenCallGdl(gdl_path, 'Call Gdl', idaapi.CHART_GEN_DOT)
|
||||||
|
|
||||||
|
|
||||||
def generate_asm(asm_path):
|
def generate_asm(asm_path):
|
||||||
idc.GenerateFile(idc.OFILE_ASM, asm_path, 0, idc.BADADDR, 0)
|
idc.GenerateFile(idc.OFILE_ASM, asm_path, 0, idc.BADADDR, 0)
|
||||||
|
|
||||||
|
|
||||||
# 主函数
|
# 主函数
|
||||||
def main():
|
def main():
|
||||||
binary_name = idc.GetInputFile()
|
binary_name = idc.GetInputFile()
|
||||||
@ -68,6 +73,7 @@ def main():
|
|||||||
return
|
return
|
||||||
preprocess(binary_name, workflow)
|
preprocess(binary_name, workflow)
|
||||||
|
|
||||||
|
|
||||||
# 如果是作为IDA Pro的脚本运行,调用主函数
|
# 如果是作为IDA Pro的脚本运行,调用主函数
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -1,101 +0,0 @@
|
|||||||
# -*- coding: UTF-8 -*-
|
|
||||||
import sys
|
|
||||||
from matplotlib import pyplot as plt
|
|
||||||
import networkx as nx
|
|
||||||
import pickle
|
|
||||||
# sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
|
|
||||||
# sys.path.insert(1, 'C:/Python27/Lib/site-packages')
|
|
||||||
|
|
||||||
|
|
||||||
def print_obj(obj):
|
|
||||||
# "打印对象的所有属性"
|
|
||||||
print(obj.__dict__)
|
|
||||||
|
|
||||||
|
|
||||||
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
|
||||||
# sub_166C4 393
|
|
||||||
if __name__ == '__main__':
|
|
||||||
testpath = "D:\\hkn\\infected\\datasets\\virusshare_infected23_cfg\\VirusShare_9ba64176b2ca61212ff56a5b4eb546ff.ida"
|
|
||||||
fr = open(testpath, 'r')
|
|
||||||
data = pickle.load(fr) #一个二进制文件的acfgs
|
|
||||||
fr.close()
|
|
||||||
|
|
||||||
# print(type(data1))
|
|
||||||
# print_obj(data1)
|
|
||||||
# print data1.raw_graph_list[393]
|
|
||||||
# print_obj(data1.raw_graph_list[393])
|
|
||||||
# nx.draw(data1.raw_graph_list[393].g,with_labels=True)
|
|
||||||
# plt.show()
|
|
||||||
|
|
||||||
print("一个二进制文件的所有函数的原始特征,list。")
|
|
||||||
print_obj(data) # acfg list
|
|
||||||
print("\n")
|
|
||||||
|
|
||||||
print("一个函数的原始特征,由old_g(discovRe方法的ACFG),g(Genius方法的ACFG),fun_feature(表示函数级别的特征的向量)三部分构成")
|
|
||||||
print_obj(data.raw_graph_list[0]) # 一个函数的acfg
|
|
||||||
print("其中fun_features = 函数级别特征: # 1 function calls # 2 logic instructions # 3 TransferIns # 4 LocalVariables # 5 BB basicblocks# 6 Edges # 7 IncommingCalls# 8 Intrs# 9 between # 10 strings # 11 consts")
|
|
||||||
# feature = data.raw_graph_list[0].fun_features
|
|
||||||
print("old_g:{}".format(data.raw_graph_list[0].old_g))
|
|
||||||
print("g:{}".format(data.raw_graph_list[0].g))
|
|
||||||
|
|
||||||
|
|
||||||
# G = data1.raw_graph_list[393].old_g
|
|
||||||
# print G.node[0] # G.node[i]是dict
|
|
||||||
# for key, value in G.node[0].items():
|
|
||||||
# print('{key}:{value}'.format(key=key, value=value))
|
|
||||||
|
|
||||||
# 基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 字节点数量? #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 如AND #8'numTIs' 转移指令数量
|
|
||||||
G = data.raw_graph_list[0].g
|
|
||||||
print("# 基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 后代数量 #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 逻辑如AND #8'numTIs' 转移指令数量")
|
|
||||||
# print(G.node[0])
|
|
||||||
# print("\n")
|
|
||||||
# 函数内所有基本快的特征
|
|
||||||
for key, value in G.node.items():
|
|
||||||
print('{}:{}'.format(key, value))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#oldg就是读取IDA的CFG,所以数量、方向等都一样;g根据old_g生成,也一样
|
|
||||||
#old g
|
|
||||||
G = data.raw_graph_list[0].old_g
|
|
||||||
nx.draw(G, with_labels=True)
|
|
||||||
#plt.title('old_g')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
# g
|
|
||||||
G = data.raw_graph_list[0].g
|
|
||||||
nx.draw(G, with_labels=True)
|
|
||||||
#plt.title('Genius_g')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# draw graph with labels
|
|
||||||
pos = nx.spring_layout(G)
|
|
||||||
nx.draw(G, pos)
|
|
||||||
node_labels = nx.get_node_attributes(G, 'v') #networkx的node,由属性。g的属性为'v',意为原始特征的vector。old_g的属性见cfg_constructor.py
|
|
||||||
nx.draw_networkx_labels(G, pos, labels=node_labels)
|
|
||||||
#plt.title('Genius_g with raw feature vector')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
# 1 function calls(本函数的函数调用指令(call jal jalr)数量)。。注意arm中没有这些指令
|
|
||||||
|
|
||||||
# 2 logic instructions ,本函数的逻辑运算指令数量。如and、or的数量
|
|
||||||
|
|
||||||
# 3 TransferIns 转移指令(如jmp arm中为mov)数量
|
|
||||||
|
|
||||||
# 4 LocalVariables 局部变量数量
|
|
||||||
|
|
||||||
# 5 BB basicblocks数量
|
|
||||||
|
|
||||||
# 6 Edges icfg edges数量。icfg是另一篇论文dicovRe中的特征,这里暂时不管
|
|
||||||
|
|
||||||
# 7 IncommingCalls,调用本函数的指令数量
|
|
||||||
|
|
||||||
# 8 Intrs 指令数量
|
|
||||||
|
|
||||||
# 9 between 结构特征中的betweeness。
|
|
||||||
|
|
||||||
# 10 strings 字符串
|
|
||||||
|
|
||||||
# 11 consts 数字常量
|
|
@ -1,356 +0,0 @@
|
|||||||
import cPickle as pickle
|
|
||||||
from search import *
|
|
||||||
from nearpy import Engine
|
|
||||||
from nearpy.hashes import RandomDiscretizedProjections
|
|
||||||
from nearpy.filters import NearestFilter, UniqueFilter
|
|
||||||
from nearpy.distances import EuclideanDistance
|
|
||||||
from nearpy.distances import CosineDistance
|
|
||||||
from nearpy.hashes import RandomBinaryProjections
|
|
||||||
from nearpy.experiments import DistanceRatioExperiment
|
|
||||||
from redis import Redis
|
|
||||||
from nearpy.storage import RedisStorage
|
|
||||||
from feature import *
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
import pdb
|
|
||||||
import argparse
|
|
||||||
import time
|
|
||||||
import numpy as np
|
|
||||||
from refactoring import *
|
|
||||||
import pymongo
|
|
||||||
from pymongo import MongoClient
|
|
||||||
|
|
||||||
def initDB():
|
|
||||||
client = MongoClient()
|
|
||||||
client = MongoClient('localhost', 27017)
|
|
||||||
client = MongoClient('mongodb://localhost:27017/')
|
|
||||||
db = client.test_database
|
|
||||||
db = client['iot-encoding']
|
|
||||||
return db
|
|
||||||
|
|
||||||
db = initDB()
|
|
||||||
posts = db.posts
|
|
||||||
|
|
||||||
class db:
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.feature_list = {}
|
|
||||||
self.engine = None
|
|
||||||
|
|
||||||
def loadHashmap(self, feature_size, result_n):
|
|
||||||
# Create redis storage adapter
|
|
||||||
redis_object = Redis(host='localhost', port=6379, db=0)
|
|
||||||
redis_storage = RedisStorage(redis_object)
|
|
||||||
pdb.set_trace()
|
|
||||||
try:
|
|
||||||
# Get hash config from redis
|
|
||||||
config = redis_storage.load_hash_configuration('test')
|
|
||||||
# Config is existing, create hash with None parameters
|
|
||||||
lshash = RandomBinaryProjections(None, None)
|
|
||||||
# Apply configuration loaded from redis
|
|
||||||
lshash.apply_config(config)
|
|
||||||
|
|
||||||
except:
|
|
||||||
# Config is not existing, create hash from scratch, with 10 projections
|
|
||||||
lshash = RandomBinaryProjections('test', 0)
|
|
||||||
|
|
||||||
|
|
||||||
# Create engine for feature space of 100 dimensions and use our hash.
|
|
||||||
# This will set the dimension of the lshash only the first time, not when
|
|
||||||
# using the configuration loaded from redis. Use redis storage to store
|
|
||||||
# buckets.
|
|
||||||
nearest = NearestFilter(1000)
|
|
||||||
#self.engine = Engine(feature_size, lshashes=[], vector_filters=[])
|
|
||||||
pdb.set_trace()
|
|
||||||
self.engine = Engine(192, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance())
|
|
||||||
|
|
||||||
# Do some stuff like indexing or querying with the engine...
|
|
||||||
|
|
||||||
# Finally store hash configuration in redis for later use
|
|
||||||
redis_storage.store_hash_configuration(lshash)
|
|
||||||
|
|
||||||
def appendToDB(self, binary_name, funcname, fvector, firmware_name=""):
|
|
||||||
if fvector is None:
|
|
||||||
return
|
|
||||||
#ftuple = tuple([fvector])
|
|
||||||
self.engine.store_vector(np.asarray(fvector), ".".join((firmware_name,binary_name,funcname)))
|
|
||||||
|
|
||||||
def batch_appendDB(self, binary_name, features, firmware_name=""):
|
|
||||||
for funcname in features:
|
|
||||||
feature = features[funcname]
|
|
||||||
#pdb.set_trace()
|
|
||||||
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
|
||||||
|
|
||||||
def batch_appendDBbyDir(self, base_dir):
|
|
||||||
cursor = posts.find({"firmware_name":"ddwrt-r21676_result"})
|
|
||||||
i = 0
|
|
||||||
for v in cursor:
|
|
||||||
print i
|
|
||||||
i+=1
|
|
||||||
binary_name = v['binary_name']
|
|
||||||
funcname = v['func_name']
|
|
||||||
firmware_name = v['firmware_name']
|
|
||||||
feature = v['fvector']
|
|
||||||
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
|
||||||
|
|
||||||
def batch_appendDBbyDir1(self, base_dir):
|
|
||||||
image_dir = os.path.join(base_dir, "image")
|
|
||||||
firmware_featrues={}
|
|
||||||
bnum = 0
|
|
||||||
fnum = 0
|
|
||||||
i = 0
|
|
||||||
pdb.set_trace()
|
|
||||||
for firmware_name in os.listdir(image_dir):
|
|
||||||
print firmware_name
|
|
||||||
firmware_featrues[firmware_name] = {}
|
|
||||||
firmware_dir = os.path.join(image_dir, firmware_name)
|
|
||||||
for binary_name in os.listdir(firmware_dir):
|
|
||||||
if binary_name.endswith(".features"):
|
|
||||||
bnum += 1
|
|
||||||
featrues_dir = os.path.join(firmware_dir, binary_name)
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
for funcname in featrues:
|
|
||||||
fnum +=1
|
|
||||||
#pdb.set_trace()
|
|
||||||
feature = featrues[funcname]
|
|
||||||
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
|
||||||
del featrues
|
|
||||||
print("bnum ", bnum)
|
|
||||||
print("fnum ", fnum)
|
|
||||||
|
|
||||||
def dump(self, base_dir):
|
|
||||||
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
|
||||||
pickle.dump(self.feature_list, open(db_dir, 'w'))
|
|
||||||
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
|
||||||
pickle.dump(self.engine, open(db_dir, 'w'))
|
|
||||||
|
|
||||||
def loadDB(self, base_dir):
|
|
||||||
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
|
||||||
self.feature_list = pickle.load(open(db_dir, 'r'))
|
|
||||||
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
|
||||||
self.engine = pickle.load(open(db_dir, 'r'))
|
|
||||||
|
|
||||||
def findF(self, binary_name, funcname):
|
|
||||||
x = [v for v in self.feature_list if binary_name in self.feature_list[v] and funcname in self.feature_list[v][binary_name]]
|
|
||||||
return x[0]
|
|
||||||
|
|
||||||
def retrieveFeaturesByDir(n, base_dir):
|
|
||||||
firmware_featrues={}
|
|
||||||
i = 0
|
|
||||||
for firmware_name in os.listdir(base_dir):
|
|
||||||
if firmware_name.endWith(".features"):
|
|
||||||
firmware_featrues[firmware_name] = {}
|
|
||||||
firmware_dir = os.path.join(base_dir, firmware_name)
|
|
||||||
if i > 0:
|
|
||||||
break
|
|
||||||
i += 1
|
|
||||||
pdb.set_trace()
|
|
||||||
for binary_name in os.listdir(firmware_dir):
|
|
||||||
featrues_dir = os.path.join(firmware_dir, binary_name + "_cb" + str(n) + ".features")
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
for funcname in featrues:
|
|
||||||
feature = featrues[funcname]
|
|
||||||
self.appendToDB(firmware_name, binary_name, funcname, feature)
|
|
||||||
del featrues
|
|
||||||
|
|
||||||
def retrieveFeatures(n, base_dir, filename, funcs):
|
|
||||||
feature_dic = {}
|
|
||||||
featrues_dir = os.path.join(base_dir, "5000", filename + "_cb" + str(n) + ".features")
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
#featuresx = retrieveFeaturesx(filename)
|
|
||||||
for name in featrues:
|
|
||||||
#if name in funcs:
|
|
||||||
x = featrues[name]
|
|
||||||
#+ featuresx[name]
|
|
||||||
feature_dic[name] = np.asarray(x)
|
|
||||||
return feature_dic
|
|
||||||
|
|
||||||
def retrieveVuldb(base_input_dir):
|
|
||||||
vul_path = os.path.join(base_input_dir, "vul")
|
|
||||||
vul_db = pickle.load(open(vul_path, "r"))
|
|
||||||
return vul_db
|
|
||||||
|
|
||||||
|
|
||||||
def retrieveFeaturesx(filename):
|
|
||||||
ida_input_dir = os.path.join("./data/", filename + ".features")
|
|
||||||
featuresx = pickle.load(open(ida_input_dir, "r"))
|
|
||||||
return featuresx
|
|
||||||
|
|
||||||
def retrieveQueries(n, base_dir, filename1, featrues_src):
|
|
||||||
queries = {}
|
|
||||||
featrues_dir = os.path.join(base_dir, "5000", filename1 + "_cb" + str(n) + ".features")
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
#featuresx = retrieveFeaturesx(filename1)
|
|
||||||
for name in featrues:
|
|
||||||
#if name in featrues_src:
|
|
||||||
x = featrues[name]
|
|
||||||
#+ featuresx[name]
|
|
||||||
queries[name] = np.asarray(x)
|
|
||||||
return queries
|
|
||||||
|
|
||||||
def retrieveQueriesbyDir(n, base_dir, firmware_name, filename1):
|
|
||||||
queries = {}
|
|
||||||
featrues_dir = os.path.join(base_dir, firmware_name, filename1 + "_cb" + str(n) + ".features")
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
for name in featrues:
|
|
||||||
#del featrues[name][5]
|
|
||||||
queries[name] = np.asarray(featrues[name])
|
|
||||||
return queries
|
|
||||||
|
|
||||||
def retrieveQuery(n, base_dir, filename, funcname):
|
|
||||||
featrues_dir = os.path.join(base_dir, filename + "_cb" + str(n) + ".features")
|
|
||||||
featrues = pickle.load(open(featrues_dir, "r"))
|
|
||||||
f = [featrues[v] for v in featrues if funcname in v ][0]
|
|
||||||
return np.asarray(f)
|
|
||||||
|
|
||||||
def parse_command():
|
|
||||||
parser = argparse.ArgumentParser(description='Process some integers.')
|
|
||||||
parser.add_argument("--base_input_dir", type=str, help="raw binaries to process for training")
|
|
||||||
parser.add_argument('--output_dir', type=str, help="output dir")
|
|
||||||
parser.add_argument("--filename1", type=str, help="the size of each graphlet")
|
|
||||||
parser.add_argument("--filename2", type=str, help="the size of each graphlet")
|
|
||||||
parser.add_argument("--size", type=int, help="the size of each graphlet")
|
|
||||||
#parser.add_argument("--size", type=int, help="the size of each graphlet")
|
|
||||||
args = parser.parse_args()
|
|
||||||
return args
|
|
||||||
|
|
||||||
def loadFuncs(path):
|
|
||||||
funcs = {}
|
|
||||||
x86_dir = os.path.join(path, "func_candid")
|
|
||||||
#mips_dir = os.path.join(path, "openssl1.0.1a_mips.ida")
|
|
||||||
fp = open(x86_dir,"r")
|
|
||||||
for line in fp:
|
|
||||||
items = line.split("\n")
|
|
||||||
funcname = items[0]
|
|
||||||
funcs[funcname] = 1
|
|
||||||
return funcs
|
|
||||||
|
|
||||||
def dump(path, featrues, queries):
|
|
||||||
fp = open(path + "/" + "matrix", 'w')
|
|
||||||
for name in featrues:
|
|
||||||
row = []
|
|
||||||
row.append("x86")
|
|
||||||
row.append(name)
|
|
||||||
row += featrues[name]
|
|
||||||
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" %tuple(row))
|
|
||||||
for name in queries:
|
|
||||||
row = []
|
|
||||||
row.append("mips")
|
|
||||||
row.append(name)
|
|
||||||
row += queries[name]
|
|
||||||
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" % tuple(row))
|
|
||||||
fp.close()
|
|
||||||
|
|
||||||
|
|
||||||
def queryBytwo(base_input_dir, filename1, filename2, n):
|
|
||||||
threthold = 50
|
|
||||||
db_instance = db()
|
|
||||||
funcs = loadFuncs(base_input_dir)
|
|
||||||
db_instance.loadHashmap(n, 50000)
|
|
||||||
#pdb.set_trace()
|
|
||||||
featrues = retrieveFeatures(n, base_input_dir, filename1, funcs)
|
|
||||||
queries = retrieveQueries(n, base_input_dir, filename2, funcs)
|
|
||||||
#queries = refactoring(queries, featrues)
|
|
||||||
vul_db = retrieveVuldb(base_input_dir)
|
|
||||||
pdb.set_trace()
|
|
||||||
#dump(base_input_dir, featrues, queries)
|
|
||||||
#start = time.time()
|
|
||||||
#db_instance.batch_appendDBbyDir(base_input_dir)
|
|
||||||
#end = time.time()
|
|
||||||
#total = end - start
|
|
||||||
#print total
|
|
||||||
db_instance.batch_appendDB(filename1, featrues)
|
|
||||||
pdb.set_trace()
|
|
||||||
ranks = []
|
|
||||||
times = []
|
|
||||||
for threthold in xrange(1, 210, 10):
|
|
||||||
hit = []
|
|
||||||
i = 0
|
|
||||||
for name in queries:
|
|
||||||
#print i
|
|
||||||
i += 1
|
|
||||||
'''
|
|
||||||
if i == 1000:
|
|
||||||
print (sum(times)/len(times))
|
|
||||||
pdb.set_trace()
|
|
||||||
print "s"
|
|
||||||
'''
|
|
||||||
#if name not in vul_db['openssl']:
|
|
||||||
# continue
|
|
||||||
if name not in featrues:
|
|
||||||
continue
|
|
||||||
#pdb.set_trace()
|
|
||||||
query = queries[name]
|
|
||||||
#start = time.time()
|
|
||||||
x = db_instance.engine.neighbours(query)
|
|
||||||
#end = time.time()
|
|
||||||
#total = end - start
|
|
||||||
#times.append(total)
|
|
||||||
#print total
|
|
||||||
#pdb.set_trace()
|
|
||||||
try:
|
|
||||||
rank = [v for v in xrange(len(x)) if name in x[v][1]][0]
|
|
||||||
ranks.append((name, rank))
|
|
||||||
if rank <= threthold:
|
|
||||||
hit.append(1)
|
|
||||||
else:
|
|
||||||
hit.append(0)
|
|
||||||
except:
|
|
||||||
#pdb.set_trace()
|
|
||||||
hit.append(0)
|
|
||||||
pass
|
|
||||||
#pdb.set_trace()
|
|
||||||
acc = sum(hit) * 1.0 / len(hit)
|
|
||||||
print acc
|
|
||||||
|
|
||||||
def queryAll(base_dir, firmware_name, filename1, n):
|
|
||||||
threthold = 155
|
|
||||||
db_instance = db()
|
|
||||||
db_instance.loadHashmap(n, 50000)
|
|
||||||
queries = retrieveQueriesbyDir(n, base_dir, firmware_name, filename1)
|
|
||||||
start = time.time()
|
|
||||||
pdb.set_trace()
|
|
||||||
db_instance.batch_appendDBbyDir(n, base_dir)
|
|
||||||
end = time.time()
|
|
||||||
dur = end - start
|
|
||||||
print dur
|
|
||||||
pdb.set_trace()
|
|
||||||
hit = []
|
|
||||||
i = 0
|
|
||||||
times = []
|
|
||||||
for name in queries:
|
|
||||||
print i
|
|
||||||
i += 1
|
|
||||||
query = queries[name]
|
|
||||||
start = time.clock()
|
|
||||||
x = db_instance.engine.neighbours(query)
|
|
||||||
end = time.clock()
|
|
||||||
dur = end - start
|
|
||||||
times.append(dur)
|
|
||||||
#pdb.set_trace()
|
|
||||||
try:
|
|
||||||
rank = [v for v in xrange(len(x)) if name in x[v][1]]
|
|
||||||
if len(rank) > 1:
|
|
||||||
pdb.set_trace()
|
|
||||||
print "stop"
|
|
||||||
if rank[0] <= threthold:
|
|
||||||
hit.append(1)
|
|
||||||
else:
|
|
||||||
hit.append(0)
|
|
||||||
except:
|
|
||||||
hit.append(0)
|
|
||||||
|
|
||||||
acc = sum(hit) * 1.0 / len(hit)
|
|
||||||
mean = np.mean(times)
|
|
||||||
std = np.std(times)
|
|
||||||
#pdb.set_trace()
|
|
||||||
print acc
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
args = parse_command()
|
|
||||||
base_dir = args.base_input_dir
|
|
||||||
filename1 = args.filename1
|
|
||||||
filename2 = args.filename2
|
|
||||||
n = args.size
|
|
||||||
pdb.set_trace()
|
|
||||||
queryBytwo(base_dir, filename1, filename2, n)
|
|
@ -1,16 +0,0 @@
|
|||||||
@echo off
|
|
||||||
setlocal EnableDelayedExpansion
|
|
||||||
|
|
||||||
|
|
||||||
set "FOLDER_PATH=D:\bishe\dataset\train_malware"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for %%f in ("%FOLDER_PATH%\*") do (
|
|
||||||
echo !time! %%f
|
|
||||||
D:\IDA_Pro_v6.8\idaq64.exe -c -A -S"D:\bishe\Gencoding_KE\Genius3\raw-feature-extractor\preprocessing_ida.py 0" -oD:\bishe\dataset\out %%f
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
endlocal
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user