Compare commits
10 Commits
f607e57a20
...
df06fd866b
Author | SHA1 | Date | |
---|---|---|---|
df06fd866b | |||
|
7dcb04cd57 | ||
|
cfdfc03685 | ||
|
7aca23f5d2 | ||
|
0c699a829e | ||
|
995e7b7412 | ||
|
aae437a3a1 | ||
|
3206714241 | ||
|
49bae1c6ca | ||
|
7cd5e55e48 |
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
8
.idea/Gencoding.iml
Normal file
8
.idea/Gencoding.iml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
56
.idea/deployment.xml
Normal file
56
.idea/deployment.xml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||||
|
<serverData>
|
||||||
|
<paths name="king@localhost:23 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (2)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (3)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (4)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (5)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (6)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (7)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
</serverData>
|
||||||
|
</component>
|
||||||
|
</project>
|
39
.idea/inspectionProfiles/Project_Default.xml
Normal file
39
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<Languages>
|
||||||
|
<language minSize="61" name="Python" />
|
||||||
|
</Languages>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="5">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="lief" />
|
||||||
|
<item index="1" class="java.lang.String" itemvalue="pylddwrap" />
|
||||||
|
<item index="2" class="java.lang.String" itemvalue="docopt" />
|
||||||
|
<item index="3" class="java.lang.String" itemvalue="rich" />
|
||||||
|
<item index="4" class="java.lang.String" itemvalue="mysqlclient" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredErrors">
|
||||||
|
<list>
|
||||||
|
<option value="N802" />
|
||||||
|
<option value="N801" />
|
||||||
|
<option value="N806" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredIdentifiers">
|
||||||
|
<list>
|
||||||
|
<option value="b64_flag" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="D:\Program\miniconda3\envs\pyqt" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/Gencoding.iml" filepath="$PROJECT_DIR$/.idea/Gencoding.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
19
README.md
19
README.md
@ -1 +1,18 @@
|
|||||||
This is a test!
|
This project provides two components of Genius, a graph-based bug search framework. The first component is the raw feature extraction. The second is the online bug search engine.
|
||||||
|
|
||||||
|
1. The raw feature extraction is designed to achieve following two goals:
|
||||||
|
|
||||||
|
-> Extract the control flow graph for each binary function
|
||||||
|
|
||||||
|
-> Extract the attributes for each node in the grap
|
||||||
|
|
||||||
|
The feature extraction is built on top of IDA-pro. We wrote the scripts based on ida-python and extract the attributed control flow graph. ``preprocessing_ida.py'' is the main program to extract the ACFG.
|
||||||
|
|
||||||
|
2. The online bug search engine is used for real-time search:
|
||||||
|
|
||||||
|
-> It utilized localality sensitive hashing for indexing
|
||||||
|
|
||||||
|
-> Nearest-neighbor search algorithm for search
|
||||||
|
|
||||||
|
The online search is based on nearpy (https://github.com/pixelogik/NearPy).
|
||||||
|
|
||||||
|
228
raw-feature-extractor/discovRe.py
Executable file
228
raw-feature-extractor/discovRe.py
Executable file
@ -0,0 +1,228 @@
|
|||||||
|
#
|
||||||
|
# Reference Lister
|
||||||
|
#
|
||||||
|
# List all functions and all references to them in the current section.
|
||||||
|
#
|
||||||
|
# Implemented with the idautils module
|
||||||
|
#
|
||||||
|
import networkx as nx
|
||||||
|
import cPickle as pickle
|
||||||
|
import pdb
|
||||||
|
from graph_analysis_ida import *
|
||||||
|
from graph_property import *
|
||||||
|
#import wingdbstub
|
||||||
|
#wingdbstub.Ensure()
|
||||||
|
|
||||||
|
def get_funcs(ea):
|
||||||
|
funcs = {}
|
||||||
|
# Get current ea
|
||||||
|
# Loop from start to end in the current segment
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
blocks = FlowChart(func)
|
||||||
|
funcs[funcname] = []
|
||||||
|
for bl in blocks:
|
||||||
|
start = bl.startEA
|
||||||
|
end = bl.endEA
|
||||||
|
funcs[funcname].append((start, end))
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_funcs_for_discoverRe(ea):
|
||||||
|
features = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
print funcname
|
||||||
|
func = get_func(funcea)
|
||||||
|
feature = get_discoverRe_feature(func)
|
||||||
|
features[funcname] = feature
|
||||||
|
return features
|
||||||
|
|
||||||
|
def get_discoverRe_feature(func, icfg):
|
||||||
|
start = func.startEA
|
||||||
|
end = func.endEA
|
||||||
|
features = []
|
||||||
|
FunctionCalls = getFuncCalls(func)
|
||||||
|
#1
|
||||||
|
features.append(FunctionCalls)
|
||||||
|
LogicInstr = getLogicInsts(func)
|
||||||
|
#2
|
||||||
|
features.append(LogicInstr)
|
||||||
|
Transfer = getTransferInsts(func)
|
||||||
|
#3
|
||||||
|
features.append(Transfer)
|
||||||
|
Locals = getLocalVariables(func)
|
||||||
|
#4
|
||||||
|
features.append(Locals)
|
||||||
|
BB = getBasicBlocks(func)
|
||||||
|
#5
|
||||||
|
features.append(BB)
|
||||||
|
Edges = len(icfg.edges())
|
||||||
|
#6
|
||||||
|
features.append(Edges)
|
||||||
|
Incoming = getIncommingCalls(func)
|
||||||
|
#7
|
||||||
|
features.append(Incoming)
|
||||||
|
#8
|
||||||
|
Instrs = getIntrs(func)
|
||||||
|
features.append(Instrs)
|
||||||
|
between = retrieveGP(icfg)
|
||||||
|
#9
|
||||||
|
features.append(between)
|
||||||
|
|
||||||
|
strings, consts = getfunc_consts(func)
|
||||||
|
features.append(strings)
|
||||||
|
features.append(consts)
|
||||||
|
return features
|
||||||
|
|
||||||
|
def get_func_names(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
funcs[funcname] = funcea
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_bases(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
funcs[funcea] = funcname
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_range(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
funcs[funcname] = (func.startEA, func.endEA)
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_sequences(ea):
|
||||||
|
funcs_bodylist = {}
|
||||||
|
funcs = get_funcs(ea)
|
||||||
|
for funcname in funcs:
|
||||||
|
if funcname not in funcs_bodylist:
|
||||||
|
funcs_bodylist[funcname] = []
|
||||||
|
for start, end in funcs[funcname]:
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
funcs_bodylist[funcname].append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return funcs_bodylist
|
||||||
|
|
||||||
|
def get_func_cfgs(ea):
|
||||||
|
func_cfglist = {}
|
||||||
|
i = 0
|
||||||
|
start, end = get_section('LOAD')
|
||||||
|
#print start, end
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
if start <= funcea <= end:
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
try:
|
||||||
|
icfg = cfg.cfg_construct(func)
|
||||||
|
func_cfglist[funcname] = icfg
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return func_cfglist
|
||||||
|
|
||||||
|
def get_section(t):
|
||||||
|
base = SegByName(t)
|
||||||
|
start = SegByBase(base)
|
||||||
|
end = SegEnd(start)
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
|
||||||
|
def get_func_cfg_sequences(func_cfglist):
|
||||||
|
func_cfg_seqlist = {}
|
||||||
|
for funcname in func_cfglist:
|
||||||
|
func_cfg_seqlist[funcname] = {}
|
||||||
|
cfg = func_cfglist[funcname][0]
|
||||||
|
for start, end in cfg:
|
||||||
|
codesq = get_sequences(start, end)
|
||||||
|
func_cfg_seqlist[funcname][(start,end)] = codesq
|
||||||
|
|
||||||
|
return func_cfg_seqlist
|
||||||
|
|
||||||
|
|
||||||
|
def get_sequences(start, end):
|
||||||
|
seq = []
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
seq.append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return seq
|
||||||
|
|
||||||
|
def get_stack_arg(func_addr):
|
||||||
|
print func_addr
|
||||||
|
args = []
|
||||||
|
stack = GetFrame(func_addr)
|
||||||
|
if not stack:
|
||||||
|
return []
|
||||||
|
firstM = GetFirstMember(stack)
|
||||||
|
lastM = GetLastMember(stack)
|
||||||
|
i = firstM
|
||||||
|
while i <=lastM:
|
||||||
|
mName = GetMemberName(stack,i)
|
||||||
|
mSize = GetMemberSize(stack,i)
|
||||||
|
if mSize:
|
||||||
|
i = i + mSize
|
||||||
|
else:
|
||||||
|
i = i+4
|
||||||
|
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
|
||||||
|
args.append(mName)
|
||||||
|
return args
|
||||||
|
|
||||||
|
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
|
||||||
|
|
||||||
|
def processDataSegs():
|
||||||
|
funcdata = {}
|
||||||
|
datafunc = {}
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
ea = seg.startEA
|
||||||
|
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
||||||
|
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
|
||||||
|
start = idc.SegStart(ea)
|
||||||
|
end = idc.SegEnd(ea)
|
||||||
|
cur = start
|
||||||
|
while cur <= end:
|
||||||
|
refs = [v for v in DataRefsTo(cur)]
|
||||||
|
for fea in refs:
|
||||||
|
name = GetFunctionName(fea)
|
||||||
|
if len(name)== 0:
|
||||||
|
continue
|
||||||
|
if name not in funcdata:
|
||||||
|
funcdata[name] = [cur]
|
||||||
|
else:
|
||||||
|
funcdata[name].append(cur)
|
||||||
|
if cur not in datafunc:
|
||||||
|
datafunc[cur] = [name]
|
||||||
|
else:
|
||||||
|
datafunc[cur].append(name)
|
||||||
|
cur = NextHead(cur)
|
||||||
|
return funcdata, datafunc
|
||||||
|
|
||||||
|
def obtainDataRefs(callgraph):
|
||||||
|
datarefs = {}
|
||||||
|
funcdata, datafunc = processDataSegs()
|
||||||
|
for node in callgraph:
|
||||||
|
if node in funcdata:
|
||||||
|
datas = funcdata[node]
|
||||||
|
for dd in datas:
|
||||||
|
refs = datafunc[dd]
|
||||||
|
refs = list(set(refs))
|
||||||
|
if node in datarefs:
|
||||||
|
print refs
|
||||||
|
datarefs[node] += refs
|
||||||
|
datarefs[node] = list(set(datarefs[node]))
|
||||||
|
else:
|
||||||
|
datarefs[node] = refs
|
||||||
|
return datarefs
|
||||||
|
|
||||||
|
|
@ -13,7 +13,8 @@ import cfg_constructor as cfg
|
|||||||
import cPickle as pickle
|
import cPickle as pickle
|
||||||
import pdb
|
import pdb
|
||||||
from raw_graphs import *
|
from raw_graphs import *
|
||||||
from discovRe_feature.discovRe import *
|
#from discovRe_feature.discovRe import *
|
||||||
|
from discovRe import *
|
||||||
#import wingdbstub
|
#import wingdbstub
|
||||||
#wingdbstub.Ensure()
|
#wingdbstub.Ensure()
|
||||||
def gt_funcNames(ea):
|
def gt_funcNames(ea):
|
||||||
|
@ -2,23 +2,146 @@ from idautils import *
|
|||||||
from idaapi import *
|
from idaapi import *
|
||||||
from idc import *
|
from idc import *
|
||||||
|
|
||||||
def getSequences(start, end):
|
def getfunc_consts(func):
|
||||||
seqs = []
|
strings = []
|
||||||
|
consts = []
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
for bl in blocks:
|
||||||
|
strs, conts = getBBconsts(bl)
|
||||||
|
strings += strs
|
||||||
|
consts += conts
|
||||||
|
return strings, consts
|
||||||
|
|
||||||
|
def getConst(ea, offset):
|
||||||
|
strings = []
|
||||||
|
consts = []
|
||||||
|
optype1 = GetOpType(ea, offset)
|
||||||
|
if optype1 == idaapi.o_imm:
|
||||||
|
imm_value = GetOperandValue(ea, offset)
|
||||||
|
if 0<= imm_value <= 10:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
|
||||||
|
str_value = GetString(imm_value)
|
||||||
|
if str_value is None:
|
||||||
|
str_value = GetString(imm_value+0x40000)
|
||||||
|
if str_value is None:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||||
|
if re:
|
||||||
|
strings.append(str_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||||
|
if re:
|
||||||
|
strings.append(str_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
return strings, consts
|
||||||
|
|
||||||
|
def getBBconsts(bl):
|
||||||
|
strings = []
|
||||||
|
consts = []
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
inst_addr = start
|
inst_addr = start
|
||||||
while inst_addr < end:
|
while inst_addr < end:
|
||||||
opcode = GetMnem(inst_addr)
|
opcode = GetMnem(inst_addr)
|
||||||
if opcode == 'move' or opcode == "mov":
|
if opcode in ['la','jalr','call', 'jal']:
|
||||||
opnd1 = GetOpnd(inst_addr,0)
|
inst_addr = NextHead(inst_addr)
|
||||||
if opnd1 == '$v0' or opnd1 == "$eax":
|
continue
|
||||||
opcode = (opcode, opnd1)
|
strings_src, consts_src = getConst(inst_addr, 0)
|
||||||
seqs.append(opcode)
|
strings_dst, consts_dst = getConst(inst_addr, 1)
|
||||||
|
strings += strings_src
|
||||||
|
strings += strings_dst
|
||||||
|
consts += consts_src
|
||||||
|
consts += consts_dst
|
||||||
|
try:
|
||||||
|
strings_dst, consts_dst = getConst(inst_addr, 2)
|
||||||
|
consts += consts_dst
|
||||||
|
strings += strings_dst
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return seqs
|
return strings, consts
|
||||||
|
|
||||||
|
def getFuncCalls(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calCalls(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getLogicInsts(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calLogicInstructions(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getTransferInsts(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calTransferIns(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getIntrs(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calInsts(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getLocalVariables(func):
|
||||||
|
args_num = get_stackVariables(func.startEA)
|
||||||
|
return args_num
|
||||||
|
|
||||||
|
def getBasicBlocks(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
return len(blocks)
|
||||||
|
|
||||||
|
def getIncommingCalls(func):
|
||||||
|
refs = CodeRefsTo(func.startEA, 0)
|
||||||
|
re = len([v for v in refs])
|
||||||
|
return re
|
||||||
|
|
||||||
|
|
||||||
|
def get_stackVariables(func_addr):
|
||||||
|
#print func_addr
|
||||||
|
args = []
|
||||||
|
stack = GetFrame(func_addr)
|
||||||
|
if not stack:
|
||||||
|
return 0
|
||||||
|
firstM = GetFirstMember(stack)
|
||||||
|
lastM = GetLastMember(stack)
|
||||||
|
i = firstM
|
||||||
|
while i <=lastM:
|
||||||
|
mName = GetMemberName(stack,i)
|
||||||
|
mSize = GetMemberSize(stack,i)
|
||||||
|
if mSize:
|
||||||
|
i = i + mSize
|
||||||
|
else:
|
||||||
|
i = i+4
|
||||||
|
if mName not in args and mName and 'var_' in mName:
|
||||||
|
args.append(mName)
|
||||||
|
return len(args)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def calArithmeticIns(bl):
|
def calArithmeticIns(bl):
|
||||||
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
|
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
|
||||||
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
|
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
|
||||||
arm_AI = {"ADD":1, "ADC":1, "SUB":1, "SBC":1, "RSB":1, "RSC":1, "MUL":1, "MLA":1}
|
|
||||||
calls = {}
|
calls = {}
|
||||||
calls.update(x86_AI)
|
calls.update(x86_AI)
|
||||||
calls.update(mips_AI)
|
calls.update(mips_AI)
|
||||||
@ -28,22 +151,20 @@ def calArithmeticIns(bl):
|
|||||||
inst_addr = start
|
inst_addr = start
|
||||||
while inst_addr < end:
|
while inst_addr < end:
|
||||||
opcode = GetMnem(inst_addr)
|
opcode = GetMnem(inst_addr)
|
||||||
re = [v for v in calls if opcode in v]
|
if opcode in calls:
|
||||||
if len(re) > 0:
|
|
||||||
invoke_num += 1
|
invoke_num += 1
|
||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return invoke_num
|
return invoke_num
|
||||||
|
|
||||||
def calCalls(bl):
|
def calCalls(bl):
|
||||||
calls = {'call':1, 'jal':1, 'jalr':1, "BL":1}
|
calls = {'call':1, 'jal':1, 'jalr':1}
|
||||||
start = bl[0]
|
start = bl[0]
|
||||||
end = bl[1]
|
end = bl[1]
|
||||||
invoke_num = 0
|
invoke_num = 0
|
||||||
inst_addr = start
|
inst_addr = start
|
||||||
while inst_addr < end:
|
while inst_addr < end:
|
||||||
opcode = GetMnem(inst_addr)
|
opcode = GetMnem(inst_addr)
|
||||||
re = [v for v in calls if opcode in v]
|
if opcode in calls:
|
||||||
if len(re) > 0:
|
|
||||||
invoke_num += 1
|
invoke_num += 1
|
||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return invoke_num
|
return invoke_num
|
||||||
@ -61,19 +182,16 @@ def calInsts(bl):
|
|||||||
def calLogicInstructions(bl):
|
def calLogicInstructions(bl):
|
||||||
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
|
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
|
||||||
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
|
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
|
||||||
arm_LI = {"AND":1, "EOR":1, "ORR":1, "ORN":1, 'BIC':1}
|
|
||||||
calls = {}
|
calls = {}
|
||||||
calls.update(x86_LI)
|
calls.update(x86_LI)
|
||||||
calls.update(mips_LI)
|
calls.update(mips_LI)
|
||||||
calls.update(arm_LI)
|
|
||||||
start = bl[0]
|
start = bl[0]
|
||||||
end = bl[1]
|
end = bl[1]
|
||||||
invoke_num = 0
|
invoke_num = 0
|
||||||
inst_addr = start
|
inst_addr = start
|
||||||
while inst_addr < end:
|
while inst_addr < end:
|
||||||
opcode = GetMnem(inst_addr)
|
opcode = GetMnem(inst_addr)
|
||||||
re = [v for v in calls if opcode in v]
|
if opcode in calls:
|
||||||
if len(re) > 0:
|
|
||||||
invoke_num += 1
|
invoke_num += 1
|
||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return invoke_num
|
return invoke_num
|
||||||
@ -90,35 +208,19 @@ def calSconstants(bl):
|
|||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return invoke_num
|
return invoke_num
|
||||||
|
|
||||||
def getConst(ea, offset):
|
|
||||||
strings = []
|
|
||||||
consts = []
|
|
||||||
optype1 = GetOpType(ea, offset)
|
|
||||||
if optype1 == idaapi.o_imm:
|
|
||||||
imm_value = GetOperandValue(ea, offset)
|
|
||||||
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
|
|
||||||
str_value = GetString(imm_value)
|
|
||||||
strings.append(str_value)
|
|
||||||
else:
|
|
||||||
consts.append(imm_value)
|
|
||||||
return strings, consts
|
|
||||||
|
|
||||||
def getBBconsts(bl):
|
def calNconstants(bl):
|
||||||
strings = []
|
|
||||||
consts = []
|
|
||||||
start = bl[0]
|
start = bl[0]
|
||||||
end = bl[1]
|
end = bl[1]
|
||||||
invoke_num = 0
|
invoke_num = 0
|
||||||
inst_addr = start
|
inst_addr = start
|
||||||
while inst_addr < end:
|
while inst_addr < end:
|
||||||
strings_src, consts_src = getConst(inst_addr, 0)
|
optype1 = GetOpType(inst_addr, 0)
|
||||||
strings_dst, consts_dst = getConst(inst_addr, 1)
|
optype2 = GetOpType(inst_addr, 1)
|
||||||
strings += strings_src
|
if optype1 == 5 or optype2 == 5:
|
||||||
strings += strings_dst
|
invoke_num += 1
|
||||||
consts += consts_src
|
|
||||||
consts += consts_dst
|
|
||||||
inst_addr = NextHead(inst_addr)
|
inst_addr = NextHead(inst_addr)
|
||||||
return strings, consts
|
return invoke_num
|
||||||
|
|
||||||
def retrieveExterns(bl, ea_externs):
|
def retrieveExterns(bl, ea_externs):
|
||||||
externs = []
|
externs = []
|
||||||
@ -142,7 +244,6 @@ def calTransferIns(bl):
|
|||||||
calls = {}
|
calls = {}
|
||||||
calls.update(x86_TI)
|
calls.update(x86_TI)
|
||||||
calls.update(mips_TI)
|
calls.update(mips_TI)
|
||||||
calls.update(arm_TI)
|
|
||||||
start = bl[0]
|
start = bl[0]
|
||||||
end = bl[1]
|
end = bl[1]
|
||||||
invoke_num = 0
|
invoke_num = 0
|
||||||
|
24
raw-feature-extractor/graph_property.py
Normal file
24
raw-feature-extractor/graph_property.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import networkx as nx
|
||||||
|
import pdb
|
||||||
|
def betweeness(g):
|
||||||
|
#pdb.set_trace()
|
||||||
|
betweenness = nx.betweenness_centrality(g)
|
||||||
|
return betweenness
|
||||||
|
|
||||||
|
def eigenvector(g):
|
||||||
|
centrality = nx.eigenvector_centrality(g)
|
||||||
|
return centrality
|
||||||
|
|
||||||
|
def closeness_centrality(g):
|
||||||
|
closeness = nx.closeness_centrality(g)
|
||||||
|
return closeness
|
||||||
|
|
||||||
|
def retrieveGP(g):
|
||||||
|
bf = betweeness(g)
|
||||||
|
#close = closeness_centrality(g)
|
||||||
|
#bf_sim =
|
||||||
|
#close_sim =
|
||||||
|
x = sorted(bf.values())
|
||||||
|
value = sum(x)/len(x)
|
||||||
|
return round(value,5)
|
||||||
|
|
356
search-engine/db.py
Normal file
356
search-engine/db.py
Normal file
@ -0,0 +1,356 @@
|
|||||||
|
import cPickle as pickle
|
||||||
|
from search import *
|
||||||
|
from nearpy import Engine
|
||||||
|
from nearpy.hashes import RandomDiscretizedProjections
|
||||||
|
from nearpy.filters import NearestFilter, UniqueFilter
|
||||||
|
from nearpy.distances import EuclideanDistance
|
||||||
|
from nearpy.distances import CosineDistance
|
||||||
|
from nearpy.hashes import RandomBinaryProjections
|
||||||
|
from nearpy.experiments import DistanceRatioExperiment
|
||||||
|
from redis import Redis
|
||||||
|
from nearpy.storage import RedisStorage
|
||||||
|
from feature import *
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import pdb
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
from refactoring import *
|
||||||
|
import pymongo
|
||||||
|
from pymongo import MongoClient
|
||||||
|
|
||||||
|
def initDB():
|
||||||
|
client = MongoClient()
|
||||||
|
client = MongoClient('localhost', 27017)
|
||||||
|
client = MongoClient('mongodb://localhost:27017/')
|
||||||
|
db = client.test_database
|
||||||
|
db = client['iot-encoding']
|
||||||
|
return db
|
||||||
|
|
||||||
|
db = initDB()
|
||||||
|
posts = db.posts
|
||||||
|
|
||||||
|
class db:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.feature_list = {}
|
||||||
|
self.engine = None
|
||||||
|
|
||||||
|
def loadHashmap(self, feature_size, result_n):
|
||||||
|
# Create redis storage adapter
|
||||||
|
redis_object = Redis(host='localhost', port=6379, db=0)
|
||||||
|
redis_storage = RedisStorage(redis_object)
|
||||||
|
pdb.set_trace()
|
||||||
|
try:
|
||||||
|
# Get hash config from redis
|
||||||
|
config = redis_storage.load_hash_configuration('test')
|
||||||
|
# Config is existing, create hash with None parameters
|
||||||
|
lshash = RandomBinaryProjections(None, None)
|
||||||
|
# Apply configuration loaded from redis
|
||||||
|
lshash.apply_config(config)
|
||||||
|
|
||||||
|
except:
|
||||||
|
# Config is not existing, create hash from scratch, with 10 projections
|
||||||
|
lshash = RandomBinaryProjections('test', 0)
|
||||||
|
|
||||||
|
|
||||||
|
# Create engine for feature space of 100 dimensions and use our hash.
|
||||||
|
# This will set the dimension of the lshash only the first time, not when
|
||||||
|
# using the configuration loaded from redis. Use redis storage to store
|
||||||
|
# buckets.
|
||||||
|
nearest = NearestFilter(1000)
|
||||||
|
#self.engine = Engine(feature_size, lshashes=[], vector_filters=[])
|
||||||
|
pdb.set_trace()
|
||||||
|
self.engine = Engine(192, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance())
|
||||||
|
|
||||||
|
# Do some stuff like indexing or querying with the engine...
|
||||||
|
|
||||||
|
# Finally store hash configuration in redis for later use
|
||||||
|
redis_storage.store_hash_configuration(lshash)
|
||||||
|
|
||||||
|
def appendToDB(self, binary_name, funcname, fvector, firmware_name=""):
|
||||||
|
if fvector is None:
|
||||||
|
return
|
||||||
|
#ftuple = tuple([fvector])
|
||||||
|
self.engine.store_vector(np.asarray(fvector), ".".join((firmware_name,binary_name,funcname)))
|
||||||
|
|
||||||
|
def batch_appendDB(self, binary_name, features, firmware_name=""):
|
||||||
|
for funcname in features:
|
||||||
|
feature = features[funcname]
|
||||||
|
#pdb.set_trace()
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
|
||||||
|
def batch_appendDBbyDir(self, base_dir):
|
||||||
|
cursor = posts.find({"firmware_name":"ddwrt-r21676_result"})
|
||||||
|
i = 0
|
||||||
|
for v in cursor:
|
||||||
|
print i
|
||||||
|
i+=1
|
||||||
|
binary_name = v['binary_name']
|
||||||
|
funcname = v['func_name']
|
||||||
|
firmware_name = v['firmware_name']
|
||||||
|
feature = v['fvector']
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
|
||||||
|
def batch_appendDBbyDir1(self, base_dir):
|
||||||
|
image_dir = os.path.join(base_dir, "image")
|
||||||
|
firmware_featrues={}
|
||||||
|
bnum = 0
|
||||||
|
fnum = 0
|
||||||
|
i = 0
|
||||||
|
pdb.set_trace()
|
||||||
|
for firmware_name in os.listdir(image_dir):
|
||||||
|
print firmware_name
|
||||||
|
firmware_featrues[firmware_name] = {}
|
||||||
|
firmware_dir = os.path.join(image_dir, firmware_name)
|
||||||
|
for binary_name in os.listdir(firmware_dir):
|
||||||
|
if binary_name.endswith(".features"):
|
||||||
|
bnum += 1
|
||||||
|
featrues_dir = os.path.join(firmware_dir, binary_name)
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for funcname in featrues:
|
||||||
|
fnum +=1
|
||||||
|
#pdb.set_trace()
|
||||||
|
feature = featrues[funcname]
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
del featrues
|
||||||
|
print("bnum ", bnum)
|
||||||
|
print("fnum ", fnum)
|
||||||
|
|
||||||
|
def dump(self, base_dir):
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
||||||
|
pickle.dump(self.feature_list, open(db_dir, 'w'))
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
||||||
|
pickle.dump(self.engine, open(db_dir, 'w'))
|
||||||
|
|
||||||
|
def loadDB(self, base_dir):
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
||||||
|
self.feature_list = pickle.load(open(db_dir, 'r'))
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
||||||
|
self.engine = pickle.load(open(db_dir, 'r'))
|
||||||
|
|
||||||
|
def findF(self, binary_name, funcname):
|
||||||
|
x = [v for v in self.feature_list if binary_name in self.feature_list[v] and funcname in self.feature_list[v][binary_name]]
|
||||||
|
return x[0]
|
||||||
|
|
||||||
|
def retrieveFeaturesByDir(n, base_dir):
|
||||||
|
firmware_featrues={}
|
||||||
|
i = 0
|
||||||
|
for firmware_name in os.listdir(base_dir):
|
||||||
|
if firmware_name.endWith(".features"):
|
||||||
|
firmware_featrues[firmware_name] = {}
|
||||||
|
firmware_dir = os.path.join(base_dir, firmware_name)
|
||||||
|
if i > 0:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
pdb.set_trace()
|
||||||
|
for binary_name in os.listdir(firmware_dir):
|
||||||
|
featrues_dir = os.path.join(firmware_dir, binary_name + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for funcname in featrues:
|
||||||
|
feature = featrues[funcname]
|
||||||
|
self.appendToDB(firmware_name, binary_name, funcname, feature)
|
||||||
|
del featrues
|
||||||
|
|
||||||
|
def retrieveFeatures(n, base_dir, filename, funcs):
|
||||||
|
feature_dic = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, "5000", filename + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
#featuresx = retrieveFeaturesx(filename)
|
||||||
|
for name in featrues:
|
||||||
|
#if name in funcs:
|
||||||
|
x = featrues[name]
|
||||||
|
#+ featuresx[name]
|
||||||
|
feature_dic[name] = np.asarray(x)
|
||||||
|
return feature_dic
|
||||||
|
|
||||||
|
def retrieveVuldb(base_input_dir):
|
||||||
|
vul_path = os.path.join(base_input_dir, "vul")
|
||||||
|
vul_db = pickle.load(open(vul_path, "r"))
|
||||||
|
return vul_db
|
||||||
|
|
||||||
|
|
||||||
|
def retrieveFeaturesx(filename):
|
||||||
|
ida_input_dir = os.path.join("./data/", filename + ".features")
|
||||||
|
featuresx = pickle.load(open(ida_input_dir, "r"))
|
||||||
|
return featuresx
|
||||||
|
|
||||||
|
def retrieveQueries(n, base_dir, filename1, featrues_src):
|
||||||
|
queries = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, "5000", filename1 + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
#featuresx = retrieveFeaturesx(filename1)
|
||||||
|
for name in featrues:
|
||||||
|
#if name in featrues_src:
|
||||||
|
x = featrues[name]
|
||||||
|
#+ featuresx[name]
|
||||||
|
queries[name] = np.asarray(x)
|
||||||
|
return queries
|
||||||
|
|
||||||
|
def retrieveQueriesbyDir(n, base_dir, firmware_name, filename1):
|
||||||
|
queries = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, firmware_name, filename1 + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for name in featrues:
|
||||||
|
#del featrues[name][5]
|
||||||
|
queries[name] = np.asarray(featrues[name])
|
||||||
|
return queries
|
||||||
|
|
||||||
|
def retrieveQuery(n, base_dir, filename, funcname):
|
||||||
|
featrues_dir = os.path.join(base_dir, filename + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
f = [featrues[v] for v in featrues if funcname in v ][0]
|
||||||
|
return np.asarray(f)
|
||||||
|
|
||||||
|
def parse_command():
|
||||||
|
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||||
|
parser.add_argument("--base_input_dir", type=str, help="raw binaries to process for training")
|
||||||
|
parser.add_argument('--output_dir', type=str, help="output dir")
|
||||||
|
parser.add_argument("--filename1", type=str, help="the size of each graphlet")
|
||||||
|
parser.add_argument("--filename2", type=str, help="the size of each graphlet")
|
||||||
|
parser.add_argument("--size", type=int, help="the size of each graphlet")
|
||||||
|
#parser.add_argument("--size", type=int, help="the size of each graphlet")
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
def loadFuncs(path):
|
||||||
|
funcs = {}
|
||||||
|
x86_dir = os.path.join(path, "func_candid")
|
||||||
|
#mips_dir = os.path.join(path, "openssl1.0.1a_mips.ida")
|
||||||
|
fp = open(x86_dir,"r")
|
||||||
|
for line in fp:
|
||||||
|
items = line.split("\n")
|
||||||
|
funcname = items[0]
|
||||||
|
funcs[funcname] = 1
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def dump(path, featrues, queries):
|
||||||
|
fp = open(path + "/" + "matrix", 'w')
|
||||||
|
for name in featrues:
|
||||||
|
row = []
|
||||||
|
row.append("x86")
|
||||||
|
row.append(name)
|
||||||
|
row += featrues[name]
|
||||||
|
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" %tuple(row))
|
||||||
|
for name in queries:
|
||||||
|
row = []
|
||||||
|
row.append("mips")
|
||||||
|
row.append(name)
|
||||||
|
row += queries[name]
|
||||||
|
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" % tuple(row))
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
|
||||||
|
def queryBytwo(base_input_dir, filename1, filename2, n):
|
||||||
|
threthold = 50
|
||||||
|
db_instance = db()
|
||||||
|
funcs = loadFuncs(base_input_dir)
|
||||||
|
db_instance.loadHashmap(n, 50000)
|
||||||
|
#pdb.set_trace()
|
||||||
|
featrues = retrieveFeatures(n, base_input_dir, filename1, funcs)
|
||||||
|
queries = retrieveQueries(n, base_input_dir, filename2, funcs)
|
||||||
|
#queries = refactoring(queries, featrues)
|
||||||
|
vul_db = retrieveVuldb(base_input_dir)
|
||||||
|
pdb.set_trace()
|
||||||
|
#dump(base_input_dir, featrues, queries)
|
||||||
|
#start = time.time()
|
||||||
|
#db_instance.batch_appendDBbyDir(base_input_dir)
|
||||||
|
#end = time.time()
|
||||||
|
#total = end - start
|
||||||
|
#print total
|
||||||
|
db_instance.batch_appendDB(filename1, featrues)
|
||||||
|
pdb.set_trace()
|
||||||
|
ranks = []
|
||||||
|
times = []
|
||||||
|
for threthold in xrange(1, 210, 10):
|
||||||
|
hit = []
|
||||||
|
i = 0
|
||||||
|
for name in queries:
|
||||||
|
#print i
|
||||||
|
i += 1
|
||||||
|
'''
|
||||||
|
if i == 1000:
|
||||||
|
print (sum(times)/len(times))
|
||||||
|
pdb.set_trace()
|
||||||
|
print "s"
|
||||||
|
'''
|
||||||
|
#if name not in vul_db['openssl']:
|
||||||
|
# continue
|
||||||
|
if name not in featrues:
|
||||||
|
continue
|
||||||
|
#pdb.set_trace()
|
||||||
|
query = queries[name]
|
||||||
|
#start = time.time()
|
||||||
|
x = db_instance.engine.neighbours(query)
|
||||||
|
#end = time.time()
|
||||||
|
#total = end - start
|
||||||
|
#times.append(total)
|
||||||
|
#print total
|
||||||
|
#pdb.set_trace()
|
||||||
|
try:
|
||||||
|
rank = [v for v in xrange(len(x)) if name in x[v][1]][0]
|
||||||
|
ranks.append((name, rank))
|
||||||
|
if rank <= threthold:
|
||||||
|
hit.append(1)
|
||||||
|
else:
|
||||||
|
hit.append(0)
|
||||||
|
except:
|
||||||
|
#pdb.set_trace()
|
||||||
|
hit.append(0)
|
||||||
|
pass
|
||||||
|
#pdb.set_trace()
|
||||||
|
acc = sum(hit) * 1.0 / len(hit)
|
||||||
|
print acc
|
||||||
|
|
||||||
|
def queryAll(base_dir, firmware_name, filename1, n):
|
||||||
|
threthold = 155
|
||||||
|
db_instance = db()
|
||||||
|
db_instance.loadHashmap(n, 50000)
|
||||||
|
queries = retrieveQueriesbyDir(n, base_dir, firmware_name, filename1)
|
||||||
|
start = time.time()
|
||||||
|
pdb.set_trace()
|
||||||
|
db_instance.batch_appendDBbyDir(n, base_dir)
|
||||||
|
end = time.time()
|
||||||
|
dur = end - start
|
||||||
|
print dur
|
||||||
|
pdb.set_trace()
|
||||||
|
hit = []
|
||||||
|
i = 0
|
||||||
|
times = []
|
||||||
|
for name in queries:
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
query = queries[name]
|
||||||
|
start = time.clock()
|
||||||
|
x = db_instance.engine.neighbours(query)
|
||||||
|
end = time.clock()
|
||||||
|
dur = end - start
|
||||||
|
times.append(dur)
|
||||||
|
#pdb.set_trace()
|
||||||
|
try:
|
||||||
|
rank = [v for v in xrange(len(x)) if name in x[v][1]]
|
||||||
|
if len(rank) > 1:
|
||||||
|
pdb.set_trace()
|
||||||
|
print "stop"
|
||||||
|
if rank[0] <= threthold:
|
||||||
|
hit.append(1)
|
||||||
|
else:
|
||||||
|
hit.append(0)
|
||||||
|
except:
|
||||||
|
hit.append(0)
|
||||||
|
|
||||||
|
acc = sum(hit) * 1.0 / len(hit)
|
||||||
|
mean = np.mean(times)
|
||||||
|
std = np.std(times)
|
||||||
|
#pdb.set_trace()
|
||||||
|
print acc
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_command()
|
||||||
|
base_dir = args.base_input_dir
|
||||||
|
filename1 = args.filename1
|
||||||
|
filename2 = args.filename2
|
||||||
|
n = args.size
|
||||||
|
pdb.set_trace()
|
||||||
|
queryBytwo(base_dir, filename1, filename2, n)
|
Loading…
Reference in New Issue
Block a user