Compare commits
10 Commits
3206714241
...
d9670b2d8b
Author | SHA1 | Date | |
---|---|---|---|
d9670b2d8b | |||
|
8dad81779b | ||
|
17c1ac88b1 | ||
|
e29e36aa32 | ||
|
7dcb04cd57 | ||
|
cfdfc03685 | ||
|
7aca23f5d2 | ||
|
0c699a829e | ||
|
995e7b7412 | ||
|
aae437a3a1 |
8
.idea/.gitignore
vendored
Normal file
8
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
8
.idea/Gencoding_plus.iml
Normal file
8
.idea/Gencoding_plus.iml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
56
.idea/deployment.xml
Normal file
56
.idea/deployment.xml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||||
|
<serverData>
|
||||||
|
<paths name="king@localhost:23 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (2)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (3)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (4)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (5)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (6)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="king@localhost:23 password (7)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
</serverData>
|
||||||
|
</component>
|
||||||
|
</project>
|
39
.idea/inspectionProfiles/Project_Default.xml
Normal file
39
.idea/inspectionProfiles/Project_Default.xml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<Languages>
|
||||||
|
<language minSize="61" name="Python" />
|
||||||
|
</Languages>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="5">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="lief" />
|
||||||
|
<item index="1" class="java.lang.String" itemvalue="pylddwrap" />
|
||||||
|
<item index="2" class="java.lang.String" itemvalue="docopt" />
|
||||||
|
<item index="3" class="java.lang.String" itemvalue="rich" />
|
||||||
|
<item index="4" class="java.lang.String" itemvalue="mysqlclient" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredErrors">
|
||||||
|
<list>
|
||||||
|
<option value="N802" />
|
||||||
|
<option value="N801" />
|
||||||
|
<option value="N806" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredIdentifiers">
|
||||||
|
<list>
|
||||||
|
<option value="b64_flag" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="D:\Program\miniconda3\envs\pyqt" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/Gencoding_plus.iml" filepath="$PROJECT_DIR$/.idea/Gencoding_plus.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
3
Genius3/.idea/.gitignore
vendored
Normal file
3
Genius3/.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
11
Genius3/.idea/Genius3.iml
Normal file
11
Genius3/.idea/Genius3.iml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/python" isTestSource="false" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
6
Genius3/.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
Genius3/.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
Genius3/.idea/misc.xml
Normal file
4
Genius3/.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7 (Genius3)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
Genius3/.idea/modules.xml
Normal file
8
Genius3/.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/Genius3.iml" filepath="$PROJECT_DIR$/.idea/Genius3.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
16
Genius3/main.py
Normal file
16
Genius3/main.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from func import *
|
||||||
|
from raw_graphs import *
|
||||||
|
from idc import *
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print "hello"
|
||||||
|
|
||||||
|
#
|
||||||
|
# E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -A -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius3\acfgs" hpcenter
|
||||||
|
# -c 删除旧数据库 -A 自动分析,不显示对话框
|
||||||
|
# -B 相当于 -c -A
|
||||||
|
|
BIN
Genius3/python/PySide/QtCore.pyd
Normal file
BIN
Genius3/python/PySide/QtCore.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtDeclarative.pyd
Normal file
BIN
Genius3/python/PySide/QtDeclarative.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtGui.pyd
Normal file
BIN
Genius3/python/PySide/QtGui.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtHelp.pyd
Normal file
BIN
Genius3/python/PySide/QtHelp.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtMultimedia.pyd
Normal file
BIN
Genius3/python/PySide/QtMultimedia.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtNetwork.pyd
Normal file
BIN
Genius3/python/PySide/QtNetwork.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtOpenGL.pyd
Normal file
BIN
Genius3/python/PySide/QtOpenGL.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtScript.pyd
Normal file
BIN
Genius3/python/PySide/QtScript.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtScriptTools.pyd
Normal file
BIN
Genius3/python/PySide/QtScriptTools.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtSql.pyd
Normal file
BIN
Genius3/python/PySide/QtSql.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtSvg.pyd
Normal file
BIN
Genius3/python/PySide/QtSvg.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtTest.pyd
Normal file
BIN
Genius3/python/PySide/QtTest.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtUiTools.pyd
Normal file
BIN
Genius3/python/PySide/QtUiTools.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtXml.pyd
Normal file
BIN
Genius3/python/PySide/QtXml.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/QtXmlPatterns.pyd
Normal file
BIN
Genius3/python/PySide/QtXmlPatterns.pyd
Normal file
Binary file not shown.
3
Genius3/python/PySide/__init__.py
Normal file
3
Genius3/python/PySide/__init__.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
__all__ = ['QtCore', 'QtGui', 'QtNetwork', 'QtOpenGL', 'QtSql', 'QtSvg', 'QtTest', 'QtWebKit', 'QtScript']
|
||||||
|
__version__ = "1.1.2"
|
||||||
|
__version_info__ = (1, 1, 2, "final", 1)
|
BIN
Genius3/python/PySide/phonon.pyd
Normal file
BIN
Genius3/python/PySide/phonon.pyd
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/pyside-python2.7.dll
Normal file
BIN
Genius3/python/PySide/pyside-python2.7.dll
Normal file
Binary file not shown.
BIN
Genius3/python/PySide/shiboken-python2.7.dll
Normal file
BIN
Genius3/python/PySide/shiboken-python2.7.dll
Normal file
Binary file not shown.
45005
Genius3/python/idaapi.py
Normal file
45005
Genius3/python/idaapi.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
Genius3/python/idaapi.pyc
Normal file
BIN
Genius3/python/idaapi.pyc
Normal file
Binary file not shown.
830
Genius3/python/idautils.py
Normal file
830
Genius3/python/idautils.py
Normal file
@ -0,0 +1,830 @@
|
|||||||
|
#---------------------------------------------------------------------
|
||||||
|
# IDAPython - Python plugin for Interactive Disassembler
|
||||||
|
#
|
||||||
|
# Copyright (c) 2004-2010 Gergely Erdelyi <gergely.erdelyi@d-dome.net>
|
||||||
|
#
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For detailed copyright information see the file COPYING in
|
||||||
|
# the root of the distribution archive.
|
||||||
|
#---------------------------------------------------------------------
|
||||||
|
"""
|
||||||
|
idautils.py - High level utility functions for IDA
|
||||||
|
"""
|
||||||
|
import idaapi
|
||||||
|
import idc
|
||||||
|
import types
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def refs(ea, funcfirst, funcnext):
|
||||||
|
"""
|
||||||
|
Generic reference collector - INTERNAL USE ONLY.
|
||||||
|
"""
|
||||||
|
ref = funcfirst(ea)
|
||||||
|
while ref != idaapi.BADADDR:
|
||||||
|
yield ref
|
||||||
|
ref = funcnext(ea, ref)
|
||||||
|
|
||||||
|
|
||||||
|
def CodeRefsTo(ea, flow):
|
||||||
|
"""
|
||||||
|
Get a list of code references to 'ea'
|
||||||
|
|
||||||
|
@param ea: Target address
|
||||||
|
@param flow: Follow normal code flow or not
|
||||||
|
@type flow: Boolean (0/1, False/True)
|
||||||
|
|
||||||
|
@return: list of references (may be empty list)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
for ref in CodeRefsTo(ScreenEA(), 1):
|
||||||
|
print ref
|
||||||
|
"""
|
||||||
|
if flow == 1:
|
||||||
|
return refs(ea, idaapi.get_first_cref_to, idaapi.get_next_cref_to)
|
||||||
|
else:
|
||||||
|
return refs(ea, idaapi.get_first_fcref_to, idaapi.get_next_fcref_to)
|
||||||
|
|
||||||
|
|
||||||
|
def CodeRefsFrom(ea, flow):
|
||||||
|
"""
|
||||||
|
Get a list of code references from 'ea'
|
||||||
|
|
||||||
|
@param ea: Target address
|
||||||
|
@param flow: Follow normal code flow or not
|
||||||
|
@type flow: Boolean (0/1, False/True)
|
||||||
|
|
||||||
|
@return: list of references (may be empty list)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
for ref in CodeRefsFrom(ScreenEA(), 1):
|
||||||
|
print ref
|
||||||
|
"""
|
||||||
|
if flow == 1:
|
||||||
|
return refs(ea, idaapi.get_first_cref_from, idaapi.get_next_cref_from)
|
||||||
|
else:
|
||||||
|
return refs(ea, idaapi.get_first_fcref_from, idaapi.get_next_fcref_from)
|
||||||
|
|
||||||
|
|
||||||
|
def DataRefsTo(ea):
|
||||||
|
"""
|
||||||
|
Get a list of data references to 'ea'
|
||||||
|
|
||||||
|
@param ea: Target address
|
||||||
|
|
||||||
|
@return: list of references (may be empty list)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
for ref in DataRefsTo(ScreenEA()):
|
||||||
|
print ref
|
||||||
|
"""
|
||||||
|
return refs(ea, idaapi.get_first_dref_to, idaapi.get_next_dref_to)
|
||||||
|
|
||||||
|
|
||||||
|
def DataRefsFrom(ea):
|
||||||
|
"""
|
||||||
|
Get a list of data references from 'ea'
|
||||||
|
|
||||||
|
@param ea: Target address
|
||||||
|
|
||||||
|
@return: list of references (may be empty list)
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
for ref in DataRefsFrom(ScreenEA()):
|
||||||
|
print ref
|
||||||
|
"""
|
||||||
|
return refs(ea, idaapi.get_first_dref_from, idaapi.get_next_dref_from)
|
||||||
|
|
||||||
|
|
||||||
|
def XrefTypeName(typecode):
|
||||||
|
"""
|
||||||
|
Convert cross-reference type codes to readable names
|
||||||
|
|
||||||
|
@param typecode: cross-reference type code
|
||||||
|
"""
|
||||||
|
ref_types = {
|
||||||
|
0 : 'Data_Unknown',
|
||||||
|
1 : 'Data_Offset',
|
||||||
|
2 : 'Data_Write',
|
||||||
|
3 : 'Data_Read',
|
||||||
|
4 : 'Data_Text',
|
||||||
|
5 : 'Data_Informational',
|
||||||
|
16 : 'Code_Far_Call',
|
||||||
|
17 : 'Code_Near_Call',
|
||||||
|
18 : 'Code_Far_Jump',
|
||||||
|
19 : 'Code_Near_Jump',
|
||||||
|
20 : 'Code_User',
|
||||||
|
21 : 'Ordinary_Flow'
|
||||||
|
}
|
||||||
|
assert typecode in ref_types, "unknown reference type %d" % typecode
|
||||||
|
return ref_types[typecode]
|
||||||
|
|
||||||
|
|
||||||
|
def _copy_xref(xref):
|
||||||
|
""" Make a private copy of the xref class to preserve its contents """
|
||||||
|
class _xref(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
xr = _xref()
|
||||||
|
for attr in [ 'frm', 'to', 'iscode', 'type', 'user' ]:
|
||||||
|
setattr(xr, attr, getattr(xref, attr))
|
||||||
|
return xr
|
||||||
|
|
||||||
|
|
||||||
|
def XrefsFrom(ea, flags=0):
|
||||||
|
"""
|
||||||
|
Return all references from address 'ea'
|
||||||
|
|
||||||
|
@param ea: Reference address
|
||||||
|
@param flags: any of idaapi.XREF_* flags
|
||||||
|
|
||||||
|
Example::
|
||||||
|
for xref in XrefsFrom(here(), 0):
|
||||||
|
print xref.type, XrefTypeName(xref.type), \
|
||||||
|
'from', hex(xref.frm), 'to', hex(xref.to)
|
||||||
|
"""
|
||||||
|
xref = idaapi.xrefblk_t()
|
||||||
|
if xref.first_from(ea, flags):
|
||||||
|
yield _copy_xref(xref)
|
||||||
|
while xref.next_from():
|
||||||
|
yield _copy_xref(xref)
|
||||||
|
|
||||||
|
|
||||||
|
def XrefsTo(ea, flags=0):
|
||||||
|
"""
|
||||||
|
Return all references to address 'ea'
|
||||||
|
|
||||||
|
@param ea: Reference address
|
||||||
|
@param flags: any of idaapi.XREF_* flags
|
||||||
|
|
||||||
|
Example::
|
||||||
|
for xref in XrefsTo(here(), 0):
|
||||||
|
print xref.type, XrefTypeName(xref.type), \
|
||||||
|
'from', hex(xref.frm), 'to', hex(xref.to)
|
||||||
|
"""
|
||||||
|
xref = idaapi.xrefblk_t()
|
||||||
|
if xref.first_to(ea, flags):
|
||||||
|
yield _copy_xref(xref)
|
||||||
|
while xref.next_to():
|
||||||
|
yield _copy_xref(xref)
|
||||||
|
|
||||||
|
|
||||||
|
def Threads():
|
||||||
|
"""Returns all thread IDs"""
|
||||||
|
for i in xrange(0, idc.GetThreadQty()):
|
||||||
|
yield idc.GetThreadId(i)
|
||||||
|
|
||||||
|
|
||||||
|
def Heads(start=None, end=None):
|
||||||
|
"""
|
||||||
|
Get a list of heads (instructions or data)
|
||||||
|
|
||||||
|
@param start: start address (default: inf.minEA)
|
||||||
|
@param end: end address (default: inf.maxEA)
|
||||||
|
|
||||||
|
@return: list of heads between start and end
|
||||||
|
"""
|
||||||
|
if not start: start = idaapi.cvar.inf.minEA
|
||||||
|
if not end: end = idaapi.cvar.inf.maxEA
|
||||||
|
|
||||||
|
ea = start
|
||||||
|
if not idc.isHead(idc.GetFlags(ea)):
|
||||||
|
ea = idaapi.next_head(ea, end)
|
||||||
|
while ea != idaapi.BADADDR:
|
||||||
|
yield ea
|
||||||
|
ea = idaapi.next_head(ea, end)
|
||||||
|
|
||||||
|
|
||||||
|
def Functions(start=None, end=None):
|
||||||
|
"""
|
||||||
|
Get a list of functions
|
||||||
|
|
||||||
|
@param start: start address (default: inf.minEA)
|
||||||
|
@param end: end address (default: inf.maxEA)
|
||||||
|
|
||||||
|
@return: list of heads between start and end
|
||||||
|
|
||||||
|
@note: The last function that starts before 'end' is included even
|
||||||
|
if it extends beyond 'end'. Any function that has its chunks scattered
|
||||||
|
in multiple segments will be reported multiple times, once in each segment
|
||||||
|
as they are listed.
|
||||||
|
"""
|
||||||
|
if not start: start = idaapi.cvar.inf.minEA
|
||||||
|
if not end: end = idaapi.cvar.inf.maxEA
|
||||||
|
|
||||||
|
# find first function head chunk in the range
|
||||||
|
chunk = idaapi.get_fchunk(start)
|
||||||
|
if not chunk:
|
||||||
|
chunk = idaapi.get_next_fchunk(start)
|
||||||
|
while chunk and chunk.startEA < end and (chunk.flags & idaapi.FUNC_TAIL) != 0:
|
||||||
|
chunk = idaapi.get_next_fchunk(chunk.startEA)
|
||||||
|
func = chunk
|
||||||
|
|
||||||
|
while func and func.startEA < end:
|
||||||
|
startea = func.startEA
|
||||||
|
yield startea
|
||||||
|
func = idaapi.get_next_func(startea)
|
||||||
|
|
||||||
|
|
||||||
|
def Chunks(start):
|
||||||
|
"""
|
||||||
|
Get a list of function chunks
|
||||||
|
|
||||||
|
@param start: address of the function
|
||||||
|
|
||||||
|
@return: list of funcion chunks (tuples of the form (start_ea, end_ea))
|
||||||
|
belonging to the function
|
||||||
|
"""
|
||||||
|
func_iter = idaapi.func_tail_iterator_t( idaapi.get_func( start ) )
|
||||||
|
status = func_iter.main()
|
||||||
|
while status:
|
||||||
|
chunk = func_iter.chunk()
|
||||||
|
yield (chunk.startEA, chunk.endEA)
|
||||||
|
status = func_iter.next()
|
||||||
|
|
||||||
|
|
||||||
|
def Modules():
|
||||||
|
"""
|
||||||
|
Returns a list of module objects with name,size,base and the rebase_to attributes
|
||||||
|
"""
|
||||||
|
mod = idaapi.module_info_t()
|
||||||
|
result = idaapi.get_first_module(mod)
|
||||||
|
while result:
|
||||||
|
yield idaapi.object_t(name=mod.name, size=mod.size, base=mod.base, rebase_to=mod.rebase_to)
|
||||||
|
result = idaapi.get_next_module(mod)
|
||||||
|
|
||||||
|
|
||||||
|
def Names():
|
||||||
|
"""
|
||||||
|
Returns a list of names
|
||||||
|
|
||||||
|
@return: List of tuples (ea, name)
|
||||||
|
"""
|
||||||
|
for i in xrange(idaapi.get_nlist_size()):
|
||||||
|
ea = idaapi.get_nlist_ea(i)
|
||||||
|
name = idaapi.get_nlist_name(i)
|
||||||
|
yield (ea, name)
|
||||||
|
|
||||||
|
|
||||||
|
def Segments():
|
||||||
|
"""
|
||||||
|
Get list of segments (sections) in the binary image
|
||||||
|
|
||||||
|
@return: List of segment start addresses.
|
||||||
|
"""
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
if seg:
|
||||||
|
yield seg.startEA
|
||||||
|
|
||||||
|
|
||||||
|
def Entries():
|
||||||
|
"""
|
||||||
|
Returns a list of entry points
|
||||||
|
|
||||||
|
@return: List of tuples (index, ordinal, ea, name)
|
||||||
|
"""
|
||||||
|
n = idaapi.get_entry_qty()
|
||||||
|
for i in xrange(0, n):
|
||||||
|
ordinal = idaapi.get_entry_ordinal(i)
|
||||||
|
ea = idaapi.get_entry(ordinal)
|
||||||
|
name = idaapi.get_entry_name(ordinal)
|
||||||
|
yield (i, ordinal, ea, name)
|
||||||
|
|
||||||
|
|
||||||
|
def FuncItems(start):
|
||||||
|
"""
|
||||||
|
Get a list of function items
|
||||||
|
|
||||||
|
@param start: address of the function
|
||||||
|
|
||||||
|
@return: ea of each item in the function
|
||||||
|
"""
|
||||||
|
func = idaapi.get_func(start)
|
||||||
|
if not func:
|
||||||
|
return
|
||||||
|
fii = idaapi.func_item_iterator_t()
|
||||||
|
ok = fii.set(func)
|
||||||
|
while ok:
|
||||||
|
yield fii.current()
|
||||||
|
ok = fii.next_code()
|
||||||
|
|
||||||
|
|
||||||
|
def Structs():
|
||||||
|
"""
|
||||||
|
Get a list of structures
|
||||||
|
|
||||||
|
@return: List of tuples (idx, sid, name)
|
||||||
|
"""
|
||||||
|
idx = idc.GetFirstStrucIdx()
|
||||||
|
while idx != idaapi.BADADDR:
|
||||||
|
sid = idc.GetStrucId(idx)
|
||||||
|
yield (idx, sid, idc.GetStrucName(sid))
|
||||||
|
idx = idc.GetNextStrucIdx(idx)
|
||||||
|
|
||||||
|
|
||||||
|
def StructMembers(sid):
|
||||||
|
"""
|
||||||
|
Get a list of structure members information (or stack vars if given a frame).
|
||||||
|
|
||||||
|
@param sid: ID of the structure.
|
||||||
|
|
||||||
|
@return: List of tuples (offset, name, size)
|
||||||
|
|
||||||
|
@note: If 'sid' does not refer to a valid structure,
|
||||||
|
an exception will be raised.
|
||||||
|
@note: This will not return 'holes' in structures/stack frames;
|
||||||
|
it only returns defined structure members.
|
||||||
|
"""
|
||||||
|
m = idc.GetFirstMember(sid)
|
||||||
|
if m == -1:
|
||||||
|
raise Exception("No structure with ID: 0x%x" % sid)
|
||||||
|
while (m != idaapi.BADADDR):
|
||||||
|
name = idc.GetMemberName(sid, m)
|
||||||
|
if name:
|
||||||
|
yield (m, name, idc.GetMemberSize(sid, m))
|
||||||
|
m = idc.GetStrucNextOff(sid, m)
|
||||||
|
|
||||||
|
|
||||||
|
def DecodePrecedingInstruction(ea):
|
||||||
|
"""
|
||||||
|
Decode preceding instruction in the execution flow.
|
||||||
|
|
||||||
|
@param ea: address to decode
|
||||||
|
@return: (None or the decode instruction, farref)
|
||||||
|
farref will contain 'true' if followed an xref, false otherwise
|
||||||
|
"""
|
||||||
|
prev_addr, farref = idaapi.decode_preceding_insn(ea)
|
||||||
|
if prev_addr == idaapi.BADADDR:
|
||||||
|
return (None, False)
|
||||||
|
else:
|
||||||
|
return (idaapi.cmd.copy(), farref)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def DecodePreviousInstruction(ea):
|
||||||
|
"""
|
||||||
|
Decodes the previous instruction and returns an insn_t like class
|
||||||
|
|
||||||
|
@param ea: address to decode
|
||||||
|
@return: None or a new insn_t instance
|
||||||
|
"""
|
||||||
|
prev_addr = idaapi.decode_prev_insn(ea)
|
||||||
|
if prev_addr == idaapi.BADADDR:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return idaapi.cmd.copy()
|
||||||
|
|
||||||
|
|
||||||
|
def DecodeInstruction(ea):
|
||||||
|
"""
|
||||||
|
Decodes an instruction and returns an insn_t like class
|
||||||
|
|
||||||
|
@param ea: address to decode
|
||||||
|
@return: None or a new insn_t instance
|
||||||
|
"""
|
||||||
|
inslen = idaapi.decode_insn(ea)
|
||||||
|
if inslen == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return idaapi.cmd.copy()
|
||||||
|
|
||||||
|
|
||||||
|
def GetDataList(ea, count, itemsize=1):
|
||||||
|
"""
|
||||||
|
Get data list - INTERNAL USE ONLY
|
||||||
|
"""
|
||||||
|
if itemsize == 1:
|
||||||
|
getdata = idaapi.get_byte
|
||||||
|
elif itemsize == 2:
|
||||||
|
getdata = idaapi.get_word
|
||||||
|
elif itemsize == 4:
|
||||||
|
getdata = idaapi.get_long
|
||||||
|
elif itemsize == 8:
|
||||||
|
getdata = idaapi.get_qword
|
||||||
|
else:
|
||||||
|
raise ValueError, "Invalid data size! Must be 1, 2, 4 or 8"
|
||||||
|
|
||||||
|
endea = ea + itemsize * count
|
||||||
|
curea = ea
|
||||||
|
while curea < endea:
|
||||||
|
yield getdata(curea)
|
||||||
|
curea += itemsize
|
||||||
|
|
||||||
|
|
||||||
|
def PutDataList(ea, datalist, itemsize=1):
|
||||||
|
"""
|
||||||
|
Put data list - INTERNAL USE ONLY
|
||||||
|
"""
|
||||||
|
putdata = None
|
||||||
|
|
||||||
|
if itemsize == 1:
|
||||||
|
putdata = idaapi.patch_byte
|
||||||
|
if itemsize == 2:
|
||||||
|
putdata = idaapi.patch_word
|
||||||
|
if itemsize == 4:
|
||||||
|
putdata = idaapi.patch_long
|
||||||
|
|
||||||
|
assert putdata, "Invalid data size! Must be 1, 2 or 4"
|
||||||
|
|
||||||
|
for val in datalist:
|
||||||
|
putdata(ea, val)
|
||||||
|
ea = ea + itemsize
|
||||||
|
|
||||||
|
|
||||||
|
def MapDataList(ea, length, func, wordsize=1):
|
||||||
|
"""
|
||||||
|
Map through a list of data words in the database
|
||||||
|
|
||||||
|
@param ea: start address
|
||||||
|
@param length: number of words to map
|
||||||
|
@param func: mapping function
|
||||||
|
@param wordsize: size of words to map [default: 1 byte]
|
||||||
|
|
||||||
|
@return: None
|
||||||
|
"""
|
||||||
|
PutDataList(ea, map(func, GetDataList(ea, length, wordsize)), wordsize)
|
||||||
|
|
||||||
|
|
||||||
|
def GetInputFileMD5():
|
||||||
|
"""
|
||||||
|
Return the MD5 hash of the input binary file
|
||||||
|
|
||||||
|
@return: MD5 string or None on error
|
||||||
|
"""
|
||||||
|
return idc.GetInputMD5()
|
||||||
|
|
||||||
|
|
||||||
|
class Strings(object):
|
||||||
|
"""
|
||||||
|
Allows iterating over the string list. The set of strings will not be modified.
|
||||||
|
, unless asked explicitly at setup()-time..
|
||||||
|
|
||||||
|
Example:
|
||||||
|
s = Strings()
|
||||||
|
|
||||||
|
for i in s:
|
||||||
|
print "%x: len=%d type=%d -> '%s'" % (i.ea, i.length, i.type, str(i))
|
||||||
|
|
||||||
|
"""
|
||||||
|
class StringItem(object):
|
||||||
|
"""
|
||||||
|
Class representing each string item.
|
||||||
|
"""
|
||||||
|
def __init__(self, si):
|
||||||
|
self.ea = si.ea
|
||||||
|
"""String ea"""
|
||||||
|
self.type = si.type
|
||||||
|
"""string type (ASCSTR_xxxxx)"""
|
||||||
|
self.length = si.length
|
||||||
|
"""string length"""
|
||||||
|
|
||||||
|
def is_1_byte_encoding(self):
|
||||||
|
return not self.is_2_bytes_encoding() and not self.is_4_bytes_encoding()
|
||||||
|
|
||||||
|
def is_2_bytes_encoding(self):
|
||||||
|
return (self.type & 7) in [idaapi.ASCSTR_UTF16, idaapi.ASCSTR_ULEN2, idaapi.ASCSTR_ULEN4]
|
||||||
|
|
||||||
|
def is_4_bytes_encoding(self):
|
||||||
|
return (self.type & 7) == idaapi.ASCSTR_UTF32
|
||||||
|
|
||||||
|
def _toseq(self, as_unicode):
|
||||||
|
if self.is_2_bytes_encoding():
|
||||||
|
conv = idaapi.ACFOPT_UTF16
|
||||||
|
pyenc = "utf-16"
|
||||||
|
elif self.is_4_bytes_encoding():
|
||||||
|
conv = idaapi.ACFOPT_UTF8
|
||||||
|
pyenc = "utf-8"
|
||||||
|
else:
|
||||||
|
conv = idaapi.ACFOPT_ASCII
|
||||||
|
pyenc = 'ascii'
|
||||||
|
strbytes = idaapi.get_ascii_contents2(self.ea, self.length, self.type, conv)
|
||||||
|
return unicode(strbytes, pyenc, 'replace') if as_unicode else strbytes
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self._toseq(False)
|
||||||
|
|
||||||
|
def __unicode__(self):
|
||||||
|
return self._toseq(True)
|
||||||
|
|
||||||
|
|
||||||
|
STR_C = 0x0001
|
||||||
|
"""C-style ASCII string"""
|
||||||
|
STR_PASCAL = 0x0002
|
||||||
|
"""Pascal-style ASCII string (length byte)"""
|
||||||
|
STR_LEN2 = 0x0004
|
||||||
|
"""Pascal-style, length is 2 bytes"""
|
||||||
|
STR_UNICODE = 0x0008
|
||||||
|
"""Unicode string"""
|
||||||
|
STR_LEN4 = 0x0010
|
||||||
|
"""Pascal-style, length is 4 bytes"""
|
||||||
|
STR_ULEN2 = 0x0020
|
||||||
|
"""Pascal-style Unicode, length is 2 bytes"""
|
||||||
|
STR_ULEN4 = 0x0040
|
||||||
|
"""Pascal-style Unicode, length is 4 bytes"""
|
||||||
|
|
||||||
|
def clear_cache(self):
|
||||||
|
"""Clears the strings list cache"""
|
||||||
|
self.refresh(0, 0) # when ea1=ea2 the kernel will clear the cache
|
||||||
|
|
||||||
|
def __init__(self, default_setup = False):
|
||||||
|
"""
|
||||||
|
Initializes the Strings enumeration helper class
|
||||||
|
|
||||||
|
@param default_setup: Set to True to use default setup (C strings, min len 5, ...)
|
||||||
|
"""
|
||||||
|
self.size = 0
|
||||||
|
if default_setup:
|
||||||
|
self.setup()
|
||||||
|
else:
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
self._si = idaapi.string_info_t()
|
||||||
|
|
||||||
|
def refresh(self, ea1=None, ea2=None):
|
||||||
|
"""Refreshes the strings list"""
|
||||||
|
if ea1 is None:
|
||||||
|
ea1 = idaapi.cvar.inf.minEA
|
||||||
|
if ea2 is None:
|
||||||
|
ea2 = idaapi.cvar.inf.maxEA
|
||||||
|
|
||||||
|
idaapi.refresh_strlist(ea1, ea2)
|
||||||
|
self.size = idaapi.get_strlist_qty()
|
||||||
|
|
||||||
|
|
||||||
|
def setup(self,
|
||||||
|
strtypes = STR_C,
|
||||||
|
minlen = 5,
|
||||||
|
only_7bit = True,
|
||||||
|
ignore_instructions = False,
|
||||||
|
ea1 = None,
|
||||||
|
ea2 = None,
|
||||||
|
display_only_existing_strings = False):
|
||||||
|
|
||||||
|
if ea1 is None:
|
||||||
|
ea1 = idaapi.cvar.inf.minEA
|
||||||
|
|
||||||
|
if ea2 is None:
|
||||||
|
ea2 = idaapi.cvar.inf.maxEA
|
||||||
|
|
||||||
|
t = idaapi.strwinsetup_t()
|
||||||
|
t.strtypes = strtypes
|
||||||
|
t.minlen = minlen
|
||||||
|
t.only_7bit = only_7bit
|
||||||
|
t.ea1 = ea1
|
||||||
|
t.ea2 = ea2
|
||||||
|
t.display_only_existing_strings = display_only_existing_strings
|
||||||
|
idaapi.set_strlist_options(t)
|
||||||
|
|
||||||
|
# Automatically refreshes
|
||||||
|
self.refresh()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_item(self, index):
|
||||||
|
if not idaapi.get_strlist_item(index, self._si):
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return Strings.StringItem(self._si)
|
||||||
|
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return (self._get_item(index) for index in xrange(0, self.size))
|
||||||
|
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
"""Returns a string item or None"""
|
||||||
|
if index >= self.size:
|
||||||
|
raise KeyError
|
||||||
|
else:
|
||||||
|
return self._get_item(index)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def GetIdbDir():
|
||||||
|
"""
|
||||||
|
Get IDB directory
|
||||||
|
|
||||||
|
This function returns directory path of the current IDB database
|
||||||
|
"""
|
||||||
|
return os.path.dirname(idaapi.cvar.database_idb) + os.sep
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def GetRegisterList():
|
||||||
|
"""Returns the register list"""
|
||||||
|
return idaapi.ph_get_regnames()
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def GetInstructionList():
|
||||||
|
"""Returns the instruction list of the current processor module"""
|
||||||
|
return [i[0] for i in idaapi.ph_get_instruc() if i[0]]
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def _Assemble(ea, line):
|
||||||
|
"""
|
||||||
|
Please refer to Assemble() - INTERNAL USE ONLY
|
||||||
|
"""
|
||||||
|
if type(line) == types.StringType:
|
||||||
|
lines = [line]
|
||||||
|
else:
|
||||||
|
lines = line
|
||||||
|
ret = []
|
||||||
|
for line in lines:
|
||||||
|
seg = idaapi.getseg(ea)
|
||||||
|
if not seg:
|
||||||
|
return (False, "No segment at ea")
|
||||||
|
ip = ea - (idaapi.ask_selector(seg.sel) << 4)
|
||||||
|
buf = idaapi.AssembleLine(ea, seg.sel, ip, seg.bitness, line)
|
||||||
|
if not buf:
|
||||||
|
return (False, "Assembler failed: " + line)
|
||||||
|
ea += len(buf)
|
||||||
|
ret.append(buf)
|
||||||
|
|
||||||
|
if len(ret) == 1:
|
||||||
|
ret = ret[0]
|
||||||
|
return (True, ret)
|
||||||
|
|
||||||
|
|
||||||
|
def Assemble(ea, line):
|
||||||
|
"""
|
||||||
|
Assembles one or more lines (does not display an message dialogs)
|
||||||
|
If line is a list then this function will attempt to assemble all the lines
|
||||||
|
This function will turn on batch mode temporarily so that no messages are displayed on the screen
|
||||||
|
|
||||||
|
@param ea: start address
|
||||||
|
@return: (False, "Error message") or (True, asm_buf) or (True, [asm_buf1, asm_buf2, asm_buf3])
|
||||||
|
"""
|
||||||
|
old_batch = idc.Batch(1)
|
||||||
|
ret = _Assemble(ea, line)
|
||||||
|
idc.Batch(old_batch)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def _copy_obj(src, dest, skip_list = None):
|
||||||
|
"""
|
||||||
|
Copy non private/non callable attributes from a class instance to another
|
||||||
|
@param src: Source class to copy from
|
||||||
|
@param dest: If it is a string then it designates the new class type that will be created and copied to.
|
||||||
|
Otherwise dest should be an instance of another class
|
||||||
|
@return: A new instance or "dest"
|
||||||
|
"""
|
||||||
|
if type(dest) == types.StringType:
|
||||||
|
# instantiate a new destination class of the specified type name?
|
||||||
|
dest = new.classobj(dest, (), {})
|
||||||
|
for x in dir(src):
|
||||||
|
# skip special and private fields
|
||||||
|
if x.startswith("__") and x.endswith("__"):
|
||||||
|
continue
|
||||||
|
# skip items in the skip list
|
||||||
|
if skip_list and x in skip_list:
|
||||||
|
continue
|
||||||
|
t = getattr(src, x)
|
||||||
|
# skip callable
|
||||||
|
if callable(t):
|
||||||
|
continue
|
||||||
|
setattr(dest, x, t)
|
||||||
|
return dest
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
class _reg_dtyp_t(object):
|
||||||
|
"""
|
||||||
|
INTERNAL
|
||||||
|
This class describes a register's number and dtyp.
|
||||||
|
The equal operator is overloaded so that two instances can be tested for equality
|
||||||
|
"""
|
||||||
|
def __init__(self, reg, dtyp):
|
||||||
|
self.reg = reg
|
||||||
|
self.dtyp = dtyp
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return (self.reg == other.reg) and (self.dtyp == other.dtyp)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
class _procregs(object):
|
||||||
|
"""Utility class allowing the users to identify registers in a decoded instruction"""
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
ri = idaapi.reg_info_t()
|
||||||
|
if not idaapi.parse_reg_name(attr, ri):
|
||||||
|
raise AttributeError()
|
||||||
|
r = _reg_dtyp_t(ri.reg, ord(idaapi.get_dtyp_by_size(ri.size)))
|
||||||
|
self.__dict__[attr] = r
|
||||||
|
return r
|
||||||
|
|
||||||
|
def __setattr__(self, attr, value):
|
||||||
|
raise AttributeError(attr)
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
class _cpu(object):
|
||||||
|
"Simple wrapper around GetRegValue/SetRegValue"
|
||||||
|
def __getattr__(self, name):
|
||||||
|
#print "cpu.get(%s)" % name
|
||||||
|
return idc.GetRegValue(name)
|
||||||
|
|
||||||
|
def __setattr__(self, name, value):
|
||||||
|
#print "cpu.set(%s)" % name
|
||||||
|
return idc.SetRegValue(value, name)
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
class __process_ui_actions_helper(object):
|
||||||
|
def __init__(self, actions, flags = 0):
|
||||||
|
"""Expect a list or a string with a list of actions"""
|
||||||
|
if isinstance(actions, str):
|
||||||
|
lst = actions.split(";")
|
||||||
|
elif isinstance(actions, (list, tuple)):
|
||||||
|
lst = actions
|
||||||
|
else:
|
||||||
|
raise ValueError, "Must pass a string, list or a tuple"
|
||||||
|
|
||||||
|
# Remember the action list and the flags
|
||||||
|
self.__action_list = lst
|
||||||
|
self.__flags = flags
|
||||||
|
|
||||||
|
# Reset action index
|
||||||
|
self.__idx = 0
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.__action_list)
|
||||||
|
|
||||||
|
def __call__(self):
|
||||||
|
if self.__idx >= len(self.__action_list):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Execute one action
|
||||||
|
idaapi.process_ui_action(
|
||||||
|
self.__action_list[self.__idx],
|
||||||
|
self.__flags)
|
||||||
|
|
||||||
|
# Move to next action
|
||||||
|
self.__idx += 1
|
||||||
|
|
||||||
|
# Reschedule
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
def ProcessUiActions(actions, flags=0):
|
||||||
|
"""
|
||||||
|
@param actions: A string containing a list of actions separated by semicolon, a list or a tuple
|
||||||
|
@param flags: flags to be passed to process_ui_action()
|
||||||
|
@return: Boolean. Returns False if the action list was empty or execute_ui_requests() failed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Instantiate a helper
|
||||||
|
helper = __process_ui_actions_helper(actions, flags)
|
||||||
|
return False if len(helper) < 1 else idaapi.execute_ui_requests((helper,))
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
class peutils_t(object):
|
||||||
|
"""
|
||||||
|
PE utility class. Retrieves PE information from the database.
|
||||||
|
|
||||||
|
Constants from pe.h
|
||||||
|
"""
|
||||||
|
PE_NODE = "$ PE header" # netnode name for PE header
|
||||||
|
PE_ALT_DBG_FPOS = idaapi.BADADDR & -1 # altval() -> translated fpos of debuginfo
|
||||||
|
PE_ALT_IMAGEBASE = idaapi.BADADDR & -2 # altval() -> loading address (usually pe.imagebase)
|
||||||
|
PE_ALT_PEHDR_OFF = idaapi.BADADDR & -3 # altval() -> offset of PE header
|
||||||
|
PE_ALT_NEFLAGS = idaapi.BADADDR & -4 # altval() -> neflags
|
||||||
|
PE_ALT_TDS_LOADED = idaapi.BADADDR & -5 # altval() -> tds already loaded(1) or invalid(-1)
|
||||||
|
PE_ALT_PSXDLL = idaapi.BADADDR & -6 # altval() -> if POSIX(x86) imports from PSXDLL netnode
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.__penode = idaapi.netnode()
|
||||||
|
self.__penode.create(peutils_t.PE_NODE)
|
||||||
|
|
||||||
|
imagebase = property(
|
||||||
|
lambda self: self.__penode.altval(peutils_t.PE_ALT_IMAGEBASE)
|
||||||
|
)
|
||||||
|
|
||||||
|
header = property(
|
||||||
|
lambda self: self.__penode.altval(peutils_t.PE_ALT_PEHDR_OFF)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "peutils_t(imagebase=%s, header=%s)" % (hex(self.imagebase), hex(self.header))
|
||||||
|
|
||||||
|
def header(self):
|
||||||
|
"""
|
||||||
|
Returns the complete PE header as an instance of peheader_t (defined in the SDK).
|
||||||
|
"""
|
||||||
|
return self.__penode.valobj()
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
cpu = _cpu()
|
||||||
|
"""This is a special class instance used to access the registers as if they were attributes of this object.
|
||||||
|
For example to access the EAX register:
|
||||||
|
print "%x" % cpu.Eax
|
||||||
|
"""
|
||||||
|
|
||||||
|
procregs = _procregs()
|
||||||
|
"""This object is used to access the processor registers. It is useful when decoding instructions and you want to see which instruction is which.
|
||||||
|
For example:
|
||||||
|
x = idautils.DecodeInstruction(here())
|
||||||
|
if x[0] == procregs.Esp:
|
||||||
|
print "This operand is the register ESP
|
||||||
|
"""
|
BIN
Genius3/python/idautils.pyc
Normal file
BIN
Genius3/python/idautils.pyc
Normal file
Binary file not shown.
8590
Genius3/python/idc.py
Normal file
8590
Genius3/python/idc.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
Genius3/python/idc.pyc
Normal file
BIN
Genius3/python/idc.pyc
Normal file
Binary file not shown.
111
Genius3/python/init.py
Normal file
111
Genius3/python/init.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
# IDAPython - Python plugin for Interactive Disassembler
|
||||||
|
#
|
||||||
|
# Copyright (c) The IDAPython Team <idapython@googlegroups.com>
|
||||||
|
#
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# For detailed copyright information see the file COPYING in
|
||||||
|
# the root of the distribution archive.
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
# init.py - Essential init routines
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import warnings
|
||||||
|
import _idaapi
|
||||||
|
|
||||||
|
# __EA64__ is set if IDA is running in 64-bit mode
|
||||||
|
__EA64__ = _idaapi.BADADDR == 0xFFFFFFFFFFFFFFFFL
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
# Take over the standard text outputs
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
class IDAPythonStdOut:
|
||||||
|
"""
|
||||||
|
Dummy file-like class that receives stout and stderr
|
||||||
|
"""
|
||||||
|
def write(self, text):
|
||||||
|
# NB: in case 'text' is Unicode, msg() will decode it
|
||||||
|
# and call umsg() to print it
|
||||||
|
_idaapi.msg(text)
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def isatty(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def runscript(script):
|
||||||
|
"""
|
||||||
|
Executes a script.
|
||||||
|
This function is present for backward compatiblity. Please use idaapi.IDAPython_ExecScript() instead
|
||||||
|
|
||||||
|
@param script: script path
|
||||||
|
|
||||||
|
@return: Error string or None on success
|
||||||
|
"""
|
||||||
|
|
||||||
|
import idaapi
|
||||||
|
return idaapi.IDAPython_ExecScript(script, globals())
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
def print_banner():
|
||||||
|
banner = [
|
||||||
|
"Python %s " % sys.version,
|
||||||
|
"IDAPython" + (" 64-bit" if __EA64__ else "") + " v%d.%d.%d %s (serial %d) (c) The IDAPython Team <idapython@googlegroups.com>" % IDAPYTHON_VERSION
|
||||||
|
]
|
||||||
|
sepline = '-' * (max([len(s) for s in banner])+1)
|
||||||
|
|
||||||
|
print(sepline)
|
||||||
|
print("\n".join(banner))
|
||||||
|
print(sepline)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Redirect stderr and stdout to the IDA message window
|
||||||
|
_orig_stdout = sys.stdout;
|
||||||
|
_orig_stderr = sys.stderr;
|
||||||
|
sys.stdout = sys.stderr = IDAPythonStdOut()
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
# Initialize the help, with our own stdin wrapper, that'll query the user
|
||||||
|
# -----------------------------------------------------------------------
|
||||||
|
import pydoc
|
||||||
|
class IDAPythonHelpPrompter:
|
||||||
|
def readline(self):
|
||||||
|
return idaapi.askstr(0, '', 'Help topic?')
|
||||||
|
help = pydoc.Helper(input = IDAPythonHelpPrompter(), output = sys.stdout)
|
||||||
|
|
||||||
|
# Assign a default sys.argv
|
||||||
|
sys.argv = [""]
|
||||||
|
|
||||||
|
# Have to make sure Python finds our modules
|
||||||
|
sys.path.append(_idaapi.idadir("python"))
|
||||||
|
|
||||||
|
# Remove current directory from the top of the patch search
|
||||||
|
if '' in sys.path: # On non Windows, the empty path is added
|
||||||
|
sys.path.remove('')
|
||||||
|
|
||||||
|
if os.getcwd() in sys.path:
|
||||||
|
sys.path.remove(os.getcwd())
|
||||||
|
|
||||||
|
# ...and add it to the end if needed
|
||||||
|
if not IDAPYTHON_REMOVE_CWD_SYS_PATH:
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
|
|
||||||
|
# Import all the required modules
|
||||||
|
from idaapi import Choose, get_user_idadir, cvar, Choose2, Appcall, Form
|
||||||
|
from idc import *
|
||||||
|
from idautils import *
|
||||||
|
import idaapi
|
||||||
|
|
||||||
|
# Load the users personal init file
|
||||||
|
userrc = os.path.join(get_user_idadir(), "idapythonrc.py")
|
||||||
|
if os.path.exists(userrc):
|
||||||
|
idaapi.IDAPython_ExecScript(userrc, globals())
|
||||||
|
|
||||||
|
# All done, ready to rock.
|
286
Genius3/raw-feature-extractor/cfg_constructor.py
Normal file
286
Genius3/raw-feature-extractor/cfg_constructor.py
Normal file
@ -0,0 +1,286 @@
|
|||||||
|
import copy
|
||||||
|
import networkx as nx
|
||||||
|
from idautils import *
|
||||||
|
from idaapi import *
|
||||||
|
from idc import *
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import networkx as nx
|
||||||
|
from idautils import *
|
||||||
|
from idaapi import *
|
||||||
|
from idc import *
|
||||||
|
from graph_analysis_ida import *
|
||||||
|
|
||||||
|
|
||||||
|
def getCfg(func, externs_eas, ea_externs):
|
||||||
|
func_start = func.startEA
|
||||||
|
func_end = func.endEA
|
||||||
|
cfg = nx.DiGraph()
|
||||||
|
control_blocks, main_blocks = obtain_block_sequence(func)
|
||||||
|
i = 0
|
||||||
|
visited = {}
|
||||||
|
start_node = None
|
||||||
|
for bl in control_blocks:
|
||||||
|
start = control_blocks[bl][0]
|
||||||
|
end = control_blocks[bl][1]
|
||||||
|
src_node = (start, end)
|
||||||
|
if src_node not in visited:
|
||||||
|
src_id = len(cfg)
|
||||||
|
visited[src_node] = src_id
|
||||||
|
cfg.add_node(src_id)
|
||||||
|
cfg.node[src_id]['label'] = src_node
|
||||||
|
else:
|
||||||
|
src_id = visited[src_node]
|
||||||
|
|
||||||
|
#if end in seq_blocks and GetMnem(PrevHead(end)) != 'jmp':
|
||||||
|
if start == func_start:
|
||||||
|
cfg.node[src_id]['c'] = "start"
|
||||||
|
start_node = src_node
|
||||||
|
if end == func_end:
|
||||||
|
cfg.node[src_id]['c'] = "end"
|
||||||
|
#print control_ea, 1
|
||||||
|
refs = CodeRefsTo(start, 0)
|
||||||
|
for ref in refs:
|
||||||
|
if ref in control_blocks:
|
||||||
|
dst_node = control_blocks[ref]
|
||||||
|
if dst_node not in visited:
|
||||||
|
visited[dst_node] = len(cfg)
|
||||||
|
dst_id = visited[dst_node]
|
||||||
|
cfg.add_edge(dst_id, src_id)
|
||||||
|
cfg.node[dst_id]['label'] = dst_node
|
||||||
|
#print control_ea, 1
|
||||||
|
refs = CodeRefsTo(start, 1)
|
||||||
|
for ref in refs:
|
||||||
|
if ref in control_blocks:
|
||||||
|
dst_node = control_blocks[ref]
|
||||||
|
if dst_node not in visited:
|
||||||
|
visited[dst_node] = len(cfg)
|
||||||
|
dst_id = visited[dst_node]
|
||||||
|
cfg.add_edge(dst_id, src_id)
|
||||||
|
cfg.node[dst_id]['label'] = dst_node
|
||||||
|
#print "attributing"
|
||||||
|
attributingRe(cfg, externs_eas, ea_externs)
|
||||||
|
# removing deadnodes
|
||||||
|
#old_cfg = copy.deepcopy(cfg)
|
||||||
|
#transform(cfg)
|
||||||
|
return cfg, 0
|
||||||
|
|
||||||
|
def transform(cfg):
|
||||||
|
merging(cfg)
|
||||||
|
filtering(cfg)
|
||||||
|
|
||||||
|
def merging(cfg):
|
||||||
|
bb_ids = cfg.nodes()
|
||||||
|
for bb_id in bb_ids:
|
||||||
|
try:
|
||||||
|
bb = cfg.node[bb_id]['label']
|
||||||
|
bb_start = bb[0]
|
||||||
|
bb_end = bb[1]
|
||||||
|
succs = cfg.successors(bb_id)
|
||||||
|
#preds = cfg.predecessors(bb_id)
|
||||||
|
if len(succs) == 1:
|
||||||
|
preds = cfg.predecessors(succs[0])
|
||||||
|
if len(preds) == 1:
|
||||||
|
domerge(cfg, bb_id, succs[0])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def domerge(cfg, bb_id, suc_node):
|
||||||
|
suc_nodes = cfg.successors(suc_node)
|
||||||
|
for node in suc_nodes:
|
||||||
|
cfg.add_edge(bb_id, node)
|
||||||
|
cfg.remove_node(suc_node)
|
||||||
|
|
||||||
|
|
||||||
|
def filtering(cfg):
|
||||||
|
rm_sets = []
|
||||||
|
for bb_id in cfg:
|
||||||
|
bb = cfg.node[bb_id]['label']
|
||||||
|
bb_start = bb[0]
|
||||||
|
bb_end = bb[1]
|
||||||
|
re = remove(bb_start, bb_end)
|
||||||
|
print bb_id, re, bb_start, bb_end
|
||||||
|
if re:
|
||||||
|
print re, bb_id
|
||||||
|
rm_sets.append(bb_id)
|
||||||
|
print rm_sets
|
||||||
|
for bb_id in rm_sets:
|
||||||
|
cfg.remove_node(bb_id)
|
||||||
|
|
||||||
|
def remove(bb_start, bb_end):
|
||||||
|
seqs = getSequences(bb_start, bb_end)
|
||||||
|
if matchseq(seqs):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def matchseq(seqs):
|
||||||
|
mips = set(['lw', "jr", "addiu"])
|
||||||
|
x86 = set(['add', 'pop', 'retn'])
|
||||||
|
b_mips = set(['b', ('move','$v0')])
|
||||||
|
b_x86 = set(['b', ('mov','$eax')])
|
||||||
|
re_mips = set([('move','$v0')])
|
||||||
|
re_x86 = set([('mov','$eax')])
|
||||||
|
diff_mips = set(seqs).difference(set(mips))
|
||||||
|
if len(diff_mips) == 0:
|
||||||
|
return True
|
||||||
|
diff_x86 = set(seqs).difference(set(x86))
|
||||||
|
if len(diff_x86) == 0:
|
||||||
|
return True
|
||||||
|
if set(seqs) == b_mips:
|
||||||
|
return True
|
||||||
|
if set(seqs) == b_x86:
|
||||||
|
return True
|
||||||
|
if set(seqs) == re_mips:
|
||||||
|
return True
|
||||||
|
if set(seqs) == re_x86:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def attributingRe(cfg, externs_eas, ea_externs):
|
||||||
|
for node_id in cfg:
|
||||||
|
bl = cfg.node[node_id]['label']
|
||||||
|
numIns = calInsts(bl)
|
||||||
|
cfg.node[node_id]['numIns'] = numIns
|
||||||
|
numCalls = calCalls(bl)
|
||||||
|
cfg.node[node_id]['numCalls'] = numCalls
|
||||||
|
numLIs = calLogicInstructions(bl)
|
||||||
|
cfg.node[node_id]['numLIs'] = numLIs
|
||||||
|
numAs = calArithmeticIns(bl)
|
||||||
|
cfg.node[node_id]['numAs'] = numAs
|
||||||
|
strings, consts = getBBconsts(bl)
|
||||||
|
cfg.node[node_id]['numNc'] = len(strings) + len(consts)
|
||||||
|
cfg.node[node_id]['consts'] = consts
|
||||||
|
cfg.node[node_id]['strings'] = strings
|
||||||
|
externs = retrieveExterns(bl, ea_externs)
|
||||||
|
cfg.node[node_id]['externs'] = externs
|
||||||
|
numTIs = calTransferIns(bl)
|
||||||
|
cfg.node[node_id]['numTIs'] = numTIs
|
||||||
|
|
||||||
|
|
||||||
|
def attributing(cfg):
|
||||||
|
ga = graph_analysis()
|
||||||
|
ga.gwithoffspring(cfg)
|
||||||
|
print "finishing offspring"
|
||||||
|
for node in cfg:
|
||||||
|
stmt_num = getStmtNum(node)
|
||||||
|
binary_value = getBinaryValue(node)
|
||||||
|
cfg.node[node]['stmt_num'] = stmt_num
|
||||||
|
cfg.node[node]['binary_value'] = binary_value
|
||||||
|
ga.domChecking(cfg)
|
||||||
|
print "finishing domChecking"
|
||||||
|
ga.loopChecking(cfg)
|
||||||
|
print "finishing loopChecking"
|
||||||
|
|
||||||
|
|
||||||
|
def getStmtNum(node):
|
||||||
|
start = node[0]
|
||||||
|
end = node[1]
|
||||||
|
stmt_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
stmt_num += 1
|
||||||
|
return stmt_num
|
||||||
|
|
||||||
|
def getBinaryValue(node):
|
||||||
|
start = node[0]
|
||||||
|
inst_addr = NextHead(start)
|
||||||
|
value = 0
|
||||||
|
addr = 0
|
||||||
|
for x in xrange((inst_addr - start)-1):
|
||||||
|
addr = start + x
|
||||||
|
y = GetOriginalByte(addr)
|
||||||
|
print value, addr, y
|
||||||
|
value = value | y
|
||||||
|
value = value << 8
|
||||||
|
print value
|
||||||
|
|
||||||
|
addr = inst_addr - 1
|
||||||
|
y = GetOriginalByte(addr)
|
||||||
|
print value, addr, y
|
||||||
|
value = value | y
|
||||||
|
print node
|
||||||
|
print bin(value)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def cfg_construct(func):
|
||||||
|
func_start = func.startEA
|
||||||
|
func_end = func.endEA
|
||||||
|
cfg = nx.DiGraph()
|
||||||
|
seq_blocks, main_blocks = obtain_block_sequence(func)
|
||||||
|
i = 0
|
||||||
|
visited = {}
|
||||||
|
for bl in seq_blocks:
|
||||||
|
start = seq_blocks[bl][0]
|
||||||
|
end = seq_blocks[bl][1]
|
||||||
|
src_node = (start, end)
|
||||||
|
if end in seq_blocks and GetMnem(PrevHead(end)) != 'jmp':
|
||||||
|
next_start = seq_blocks[end][0]
|
||||||
|
next_end = seq_blocks[end][1]
|
||||||
|
next_node = (next_start, next_end)
|
||||||
|
cfg.add_edge(src_node, next_node)
|
||||||
|
if start == func_start:
|
||||||
|
cfg.add_node(src_node, c='start')
|
||||||
|
start_node = src_node
|
||||||
|
if end == func_end:
|
||||||
|
cfg.add_node(src_node, c='end')
|
||||||
|
refs = CodeRefsFrom(PrevHead(end), 0)
|
||||||
|
|
||||||
|
for ref in refs:
|
||||||
|
#print ref
|
||||||
|
if ref in seq_blocks:
|
||||||
|
dst_node = (seq_blocks[ref][0], seq_blocks[ref][1])
|
||||||
|
cfg.add_edge(src_node, dst_node)
|
||||||
|
return cfg, start_node
|
||||||
|
|
||||||
|
|
||||||
|
def obtain_allpaths( cfg, node, path, allpaths):
|
||||||
|
path.append(node)
|
||||||
|
if 'c' in cfg.node[node] and cfg.node[node]['c'] == 'end':
|
||||||
|
allpaths.append(path)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
for suc in cfg.successors(node):
|
||||||
|
if suc not in path:
|
||||||
|
path_copy = copy.copy(path)
|
||||||
|
obtain_allpaths(cfg, suc, path_copy, allpaths)
|
||||||
|
|
||||||
|
|
||||||
|
def obtain_block_sequence(func):
|
||||||
|
control_blocks = {}
|
||||||
|
main_blocks = {}
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
for bl in blocks:
|
||||||
|
base = bl[0]
|
||||||
|
end = PrevHead(bl[1])
|
||||||
|
control_ea = checkCB(bl)
|
||||||
|
control_blocks[control_ea] = bl
|
||||||
|
control_blocks[end] = bl
|
||||||
|
if func.startEA <= base <= func.endEA:
|
||||||
|
main_blocks[base] = bl
|
||||||
|
x = sorted(main_blocks)
|
||||||
|
return control_blocks, x
|
||||||
|
|
||||||
|
def checkCB(bl):
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
ea = start
|
||||||
|
while ea < end:
|
||||||
|
if checkCondition(ea):
|
||||||
|
return ea
|
||||||
|
ea = NextHead(ea)
|
||||||
|
|
||||||
|
return PrevHead(end)
|
||||||
|
|
||||||
|
def checkCondition(ea):
|
||||||
|
mips_branch = {"beqz":1, "beq":1, "bne":1, "bgez":1, "b":1, "bnez":1, "bgtz":1, "bltz":1, "blez":1, "bgt":1, "bge":1, "blt":1, "ble":1, "bgtu":1, "bgeu":1, "bltu":1, "bleu":1}
|
||||||
|
x86_branch = {"jz":1, "jnb":1, "jne":1, "je":1, "jg":1, "jle":1, "jl":1, "jge":1, "ja":1, "jae":1, "jb":1, "jbe":1, "jo":1, "jno":1, "js":1, "jns":1}
|
||||||
|
arm_branch = {"B":1, "BAL":1, "BNE":1, "BEQ":1, "BPL":1, "BMI":1, "BCC":1, "BLO":1, "BCS":1, "BHS":1, "BVC":1, "BVS":1, "BGT":1, "BGE":1, "BLT":1, "BLE":1, "BHI":1 ,"BLS":1 }
|
||||||
|
conds = {}
|
||||||
|
conds.update(mips_branch)
|
||||||
|
conds.update(x86_branch)
|
||||||
|
opcode = GetMnem(ea)
|
||||||
|
if opcode in conds:
|
||||||
|
return True
|
||||||
|
return False
|
BIN
Genius3/raw-feature-extractor/cfg_constructor.pyc
Normal file
BIN
Genius3/raw-feature-extractor/cfg_constructor.pyc
Normal file
Binary file not shown.
228
Genius3/raw-feature-extractor/discovRe.py
Normal file
228
Genius3/raw-feature-extractor/discovRe.py
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
#
|
||||||
|
# Reference Lister
|
||||||
|
#
|
||||||
|
# List all functions and all references to them in the current section.
|
||||||
|
#
|
||||||
|
# Implemented with the idautils module
|
||||||
|
#
|
||||||
|
import networkx as nx
|
||||||
|
import cPickle as pickle
|
||||||
|
import pdb
|
||||||
|
from graph_analysis_ida import *
|
||||||
|
from graph_property import *
|
||||||
|
#import wingdbstub
|
||||||
|
#wingdbstub.Ensure()
|
||||||
|
|
||||||
|
def get_funcs(ea):
|
||||||
|
funcs = {}
|
||||||
|
# Get current ea
|
||||||
|
# Loop from start to end in the current segment
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
blocks = FlowChart(func)
|
||||||
|
funcs[funcname] = []
|
||||||
|
for bl in blocks:
|
||||||
|
start = bl.startEA
|
||||||
|
end = bl.endEA
|
||||||
|
funcs[funcname].append((start, end))
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_funcs_for_discoverRe(ea):
|
||||||
|
features = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
print funcname
|
||||||
|
func = get_func(funcea)
|
||||||
|
feature = get_discoverRe_feature(func)
|
||||||
|
features[funcname] = feature
|
||||||
|
return features
|
||||||
|
|
||||||
|
def get_discoverRe_feature(func, icfg):
|
||||||
|
start = func.startEA
|
||||||
|
end = func.endEA
|
||||||
|
features = []
|
||||||
|
FunctionCalls = getFuncCalls(func)
|
||||||
|
#1
|
||||||
|
features.append(FunctionCalls)
|
||||||
|
LogicInstr = getLogicInsts(func)
|
||||||
|
#2
|
||||||
|
features.append(LogicInstr)
|
||||||
|
Transfer = getTransferInsts(func)
|
||||||
|
#3
|
||||||
|
features.append(Transfer)
|
||||||
|
Locals = getLocalVariables(func)
|
||||||
|
#4
|
||||||
|
features.append(Locals)
|
||||||
|
BB = getBasicBlocks(func)
|
||||||
|
#5
|
||||||
|
features.append(BB)
|
||||||
|
Edges = len(icfg.edges())
|
||||||
|
#6
|
||||||
|
features.append(Edges)
|
||||||
|
Incoming = getIncommingCalls(func)
|
||||||
|
#7
|
||||||
|
features.append(Incoming)
|
||||||
|
#8
|
||||||
|
Instrs = getIntrs(func)
|
||||||
|
features.append(Instrs)
|
||||||
|
between = retrieveGP(icfg)
|
||||||
|
#9
|
||||||
|
features.append(between)
|
||||||
|
|
||||||
|
strings, consts = getfunc_consts(func)
|
||||||
|
features.append(strings)
|
||||||
|
features.append(consts)
|
||||||
|
return features
|
||||||
|
|
||||||
|
def get_func_names(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
funcs[funcname] = funcea
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_bases(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
funcs[funcea] = funcname
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_range(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
funcs[funcname] = (func.startEA, func.endEA)
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_sequences(ea):
|
||||||
|
funcs_bodylist = {}
|
||||||
|
funcs = get_funcs(ea)
|
||||||
|
for funcname in funcs:
|
||||||
|
if funcname not in funcs_bodylist:
|
||||||
|
funcs_bodylist[funcname] = []
|
||||||
|
for start, end in funcs[funcname]:
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
funcs_bodylist[funcname].append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return funcs_bodylist
|
||||||
|
|
||||||
|
def get_func_cfgs(ea):
|
||||||
|
func_cfglist = {}
|
||||||
|
i = 0
|
||||||
|
start, end = get_section('LOAD')
|
||||||
|
#print start, end
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
if start <= funcea <= end:
|
||||||
|
funcname = GetFunctionName(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
try:
|
||||||
|
icfg = cfg.cfg_construct(func)
|
||||||
|
func_cfglist[funcname] = icfg
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return func_cfglist
|
||||||
|
|
||||||
|
def get_section(t):
|
||||||
|
base = SegByName(t)
|
||||||
|
start = SegByBase(base)
|
||||||
|
end = SegEnd(start)
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
|
||||||
|
def get_func_cfg_sequences(func_cfglist):
|
||||||
|
func_cfg_seqlist = {}
|
||||||
|
for funcname in func_cfglist:
|
||||||
|
func_cfg_seqlist[funcname] = {}
|
||||||
|
cfg = func_cfglist[funcname][0]
|
||||||
|
for start, end in cfg:
|
||||||
|
codesq = get_sequences(start, end)
|
||||||
|
func_cfg_seqlist[funcname][(start,end)] = codesq
|
||||||
|
|
||||||
|
return func_cfg_seqlist
|
||||||
|
|
||||||
|
|
||||||
|
def get_sequences(start, end):
|
||||||
|
seq = []
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
seq.append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return seq
|
||||||
|
|
||||||
|
def get_stack_arg(func_addr):
|
||||||
|
print func_addr
|
||||||
|
args = []
|
||||||
|
stack = GetFrame(func_addr)
|
||||||
|
if not stack:
|
||||||
|
return []
|
||||||
|
firstM = GetFirstMember(stack)
|
||||||
|
lastM = GetLastMember(stack)
|
||||||
|
i = firstM
|
||||||
|
while i <=lastM:
|
||||||
|
mName = GetMemberName(stack,i)
|
||||||
|
mSize = GetMemberSize(stack,i)
|
||||||
|
if mSize:
|
||||||
|
i = i + mSize
|
||||||
|
else:
|
||||||
|
i = i+4
|
||||||
|
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
|
||||||
|
args.append(mName)
|
||||||
|
return args
|
||||||
|
|
||||||
|
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
|
||||||
|
|
||||||
|
def processDataSegs():
|
||||||
|
funcdata = {}
|
||||||
|
datafunc = {}
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
ea = seg.startEA
|
||||||
|
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
||||||
|
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
|
||||||
|
start = idc.SegStart(ea)
|
||||||
|
end = idc.SegEnd(ea)
|
||||||
|
cur = start
|
||||||
|
while cur <= end:
|
||||||
|
refs = [v for v in DataRefsTo(cur)]
|
||||||
|
for fea in refs:
|
||||||
|
name = GetFunctionName(fea)
|
||||||
|
if len(name)== 0:
|
||||||
|
continue
|
||||||
|
if name not in funcdata:
|
||||||
|
funcdata[name] = [cur]
|
||||||
|
else:
|
||||||
|
funcdata[name].append(cur)
|
||||||
|
if cur not in datafunc:
|
||||||
|
datafunc[cur] = [name]
|
||||||
|
else:
|
||||||
|
datafunc[cur].append(name)
|
||||||
|
cur = NextHead(cur)
|
||||||
|
return funcdata, datafunc
|
||||||
|
|
||||||
|
def obtainDataRefs(callgraph):
|
||||||
|
datarefs = {}
|
||||||
|
funcdata, datafunc = processDataSegs()
|
||||||
|
for node in callgraph:
|
||||||
|
if node in funcdata:
|
||||||
|
datas = funcdata[node]
|
||||||
|
for dd in datas:
|
||||||
|
refs = datafunc[dd]
|
||||||
|
refs = list(set(refs))
|
||||||
|
if node in datarefs:
|
||||||
|
print refs
|
||||||
|
datarefs[node] += refs
|
||||||
|
datarefs[node] = list(set(datarefs[node]))
|
||||||
|
else:
|
||||||
|
datarefs[node] = refs
|
||||||
|
return datarefs
|
||||||
|
|
||||||
|
|
BIN
Genius3/raw-feature-extractor/discovRe.pyc
Normal file
BIN
Genius3/raw-feature-extractor/discovRe.pyc
Normal file
Binary file not shown.
293
Genius3/raw-feature-extractor/func.py
Normal file
293
Genius3/raw-feature-extractor/func.py
Normal file
@ -0,0 +1,293 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
#
|
||||||
|
# Reference Lister
|
||||||
|
#
|
||||||
|
# List all functions and all references to them in the current section.
|
||||||
|
#
|
||||||
|
# Implemented with the idautils module
|
||||||
|
#
|
||||||
|
from idautils import *
|
||||||
|
from idaapi import *
|
||||||
|
from idc import *
|
||||||
|
import networkx as nx
|
||||||
|
import cfg_constructor as cfg
|
||||||
|
import cPickle as pickle
|
||||||
|
import pdb
|
||||||
|
from raw_graphs import *
|
||||||
|
#from discovRe_feature.discovRe import *
|
||||||
|
from discovRe import *
|
||||||
|
#import wingdbstub
|
||||||
|
#wingdbstub.Ensure()
|
||||||
|
|
||||||
|
def print_obj(obj):
|
||||||
|
"打印对象的所有属性"
|
||||||
|
print(obj.__dict__)
|
||||||
|
|
||||||
|
def gt_funcNames(ea):
|
||||||
|
funcs = []
|
||||||
|
plt_func, plt_data = processpltSegs()
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
if funcname in plt_func:
|
||||||
|
print funcname
|
||||||
|
continue
|
||||||
|
funcs.append(funcname)
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_funcs(ea):
|
||||||
|
funcs = {}
|
||||||
|
# Get current ea
|
||||||
|
# Loop from start to end in the current segment
|
||||||
|
plt_func, plt_data = processpltSegs()
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
if funcname in plt_func:
|
||||||
|
continue
|
||||||
|
func = get_func(funcea)
|
||||||
|
blocks = FlowChart(func)
|
||||||
|
funcs[funcname] = []
|
||||||
|
for bl in blocks:
|
||||||
|
start = bl.startEA
|
||||||
|
end = bl.endEA
|
||||||
|
funcs[funcname].append((start, end))
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
# used for the callgraph generation.
|
||||||
|
def get_func_namesWithoutE(ea):
|
||||||
|
funcs = {}
|
||||||
|
plt_func, plt_data = processpltSegs()
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
if 'close' in funcname:
|
||||||
|
print funcea
|
||||||
|
if funcname in plt_func:
|
||||||
|
print funcname
|
||||||
|
continue
|
||||||
|
funcs[funcname] = funcea
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
# used for the callgraph generation.
|
||||||
|
def get_func_names(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
funcs[funcname] = funcea
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_bases(ea):
|
||||||
|
funcs = {}
|
||||||
|
plt_func, plt_data = processpltSegs()
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
if funcname in plt_func:
|
||||||
|
continue
|
||||||
|
funcs[funcea] = funcname
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_func_range(ea):
|
||||||
|
funcs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
funcs[funcname] = (func.startEA, func.endEA)
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def get_unified_funcname(ea):
|
||||||
|
funcname = GetFunctionName(ea)
|
||||||
|
if len(funcname) > 0:
|
||||||
|
if '.' == funcname[0]:
|
||||||
|
funcname = funcname[1:]
|
||||||
|
return funcname
|
||||||
|
|
||||||
|
def get_func_sequences(ea):
|
||||||
|
funcs_bodylist = {}
|
||||||
|
funcs = get_funcs(ea)
|
||||||
|
for funcname in funcs:
|
||||||
|
if funcname not in funcs_bodylist:
|
||||||
|
funcs_bodylist[funcname] = []
|
||||||
|
for start, end in funcs[funcname]:
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
funcs_bodylist[funcname].append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return funcs_bodylist
|
||||||
|
|
||||||
|
def get_func_cfgs_c(ea):
|
||||||
|
# type: (object) -> object
|
||||||
|
binary_name = idc.GetInputFile()
|
||||||
|
raw_cfgs = raw_graphs(binary_name)
|
||||||
|
externs_eas, ea_externs = processpltSegs()
|
||||||
|
i = 0
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
icfg = cfg.getCfg(func, externs_eas, ea_externs)
|
||||||
|
func_f = get_discoverRe_feature(func, icfg[0])
|
||||||
|
raw_g = raw_graph(funcname, icfg, func_f) #生成一个rawcfg。raw_graph是一个python class,定义在 raw_graph.py.包含g(本文的ACFG)、olg_g(discovRe的acfg)、feature(函数级别的一些特征,以及betweenness)
|
||||||
|
raw_cfgs.append(raw_g) # raw_graphs 是另一个python class,存储raw_graph的list。定义在 raw_graph.py
|
||||||
|
#print(raw_g.__dict__)
|
||||||
|
#print(raw_g) 由于raw_graph、raw_graphs都是class,直接print只会打印<raw_graphs.raw_graphs instance at 0x09888FD0>,不能打印对象的属性。 #https://blog.51cto.com/steed/2046408 print_obj、 print(obj.__dict__)
|
||||||
|
return raw_cfgs
|
||||||
|
|
||||||
|
def get_func_cfgs_ctest(ea):
|
||||||
|
binary_name = idc.GetInputFile()
|
||||||
|
raw_cfgs = raw_graphs(binary_name)
|
||||||
|
externs_eas, ea_externs = processpltSegs()
|
||||||
|
i = 0
|
||||||
|
diffs = {}
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
icfg, old_cfg = cfg.getCfg(func, externs_eas, ea_externs)
|
||||||
|
diffs[funcname] = (icfg, old_cfg)
|
||||||
|
#raw_g = raw_graph(funcname, icfg)
|
||||||
|
#raw_cfgs.append(raw_g)
|
||||||
|
|
||||||
|
return diffs
|
||||||
|
|
||||||
|
def get_func_cfgs(ea):
|
||||||
|
func_cfglist = {}
|
||||||
|
i = 0
|
||||||
|
for funcea in Functions(SegStart(ea)):
|
||||||
|
funcname = get_unified_funcname(funcea)
|
||||||
|
func = get_func(funcea)
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
try:
|
||||||
|
icfg = cfg.getCfg(func)
|
||||||
|
func_cfglist[funcname] = icfg
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return func_cfglist
|
||||||
|
|
||||||
|
def get_func_cfg_sequences(func_cfglist):
|
||||||
|
func_cfg_seqlist = {}
|
||||||
|
for funcname in func_cfglist:
|
||||||
|
func_cfg_seqlist[funcname] = {}
|
||||||
|
cfg = func_cfglist[funcname][0]
|
||||||
|
for start, end in cfg:
|
||||||
|
codesq = get_sequences(start, end)
|
||||||
|
func_cfg_seqlist[funcname][(start,end)] = codesq
|
||||||
|
|
||||||
|
return func_cfg_seqlist
|
||||||
|
|
||||||
|
|
||||||
|
def get_sequences(start, end):
|
||||||
|
seq = []
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr <= end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
seq.append(opcode)
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return seq
|
||||||
|
|
||||||
|
def get_stack_arg(func_addr):
|
||||||
|
print func_addr
|
||||||
|
args = []
|
||||||
|
stack = GetFrame(func_addr)
|
||||||
|
if not stack:
|
||||||
|
return []
|
||||||
|
firstM = GetFirstMember(stack)
|
||||||
|
lastM = GetLastMember(stack)
|
||||||
|
i = firstM
|
||||||
|
while i <=lastM:
|
||||||
|
mName = GetMemberName(stack,i)
|
||||||
|
mSize = GetMemberSize(stack,i)
|
||||||
|
if mSize:
|
||||||
|
i = i + mSize
|
||||||
|
else:
|
||||||
|
i = i+4
|
||||||
|
if mName not in args and mName and ' s' not in mName and ' r' not in mName:
|
||||||
|
args.append(mName)
|
||||||
|
return args
|
||||||
|
|
||||||
|
#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
|
||||||
|
|
||||||
|
def processExternalSegs():
|
||||||
|
funcdata = {}
|
||||||
|
datafunc = {}
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
ea = seg.startEA
|
||||||
|
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
||||||
|
if segtype in [idc.SEG_XTRN]:
|
||||||
|
start = idc.SegStart(ea)
|
||||||
|
end = idc.SegEnd(ea)
|
||||||
|
cur = start
|
||||||
|
while cur <= end:
|
||||||
|
name = get_unified_funcname(cur)
|
||||||
|
funcdata[name] = hex(cur)
|
||||||
|
cur = NextHead(cur)
|
||||||
|
return funcdata
|
||||||
|
|
||||||
|
def processpltSegs():
|
||||||
|
funcdata = {}
|
||||||
|
datafunc = {}
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
ea = seg.startEA
|
||||||
|
segname = SegName(ea)
|
||||||
|
if segname in ['.plt', 'extern', '.MIPS.stubs']:
|
||||||
|
start = seg.startEA
|
||||||
|
end = seg.endEA
|
||||||
|
cur = start
|
||||||
|
while cur < end:
|
||||||
|
name = get_unified_funcname(cur)
|
||||||
|
funcdata[name] = hex(cur)
|
||||||
|
datafunc[cur]= name
|
||||||
|
cur = NextHead(cur)
|
||||||
|
return funcdata, datafunc
|
||||||
|
|
||||||
|
|
||||||
|
def processDataSegs():
|
||||||
|
funcdata = {}
|
||||||
|
datafunc = {}
|
||||||
|
for n in xrange(idaapi.get_segm_qty()):
|
||||||
|
seg = idaapi.getnseg(n)
|
||||||
|
ea = seg.startEA
|
||||||
|
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
|
||||||
|
if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
|
||||||
|
start = idc.SegStart(ea)
|
||||||
|
end = idc.SegEnd(ea)
|
||||||
|
cur = start
|
||||||
|
while cur <= end:
|
||||||
|
refs = [v for v in DataRefsTo(cur)]
|
||||||
|
for fea in refs:
|
||||||
|
name = get_unified_funcname(fea)
|
||||||
|
if len(name)== 0:
|
||||||
|
continue
|
||||||
|
if name not in funcdata:
|
||||||
|
funcdata[name] = [cur]
|
||||||
|
else:
|
||||||
|
funcdata[name].append(cur)
|
||||||
|
if cur not in datafunc:
|
||||||
|
datafunc[cur] = [name]
|
||||||
|
else:
|
||||||
|
datafunc[cur].append(name)
|
||||||
|
cur = NextHead(cur)
|
||||||
|
return funcdata, datafunc
|
||||||
|
|
||||||
|
def obtainDataRefs(callgraph):
|
||||||
|
datarefs = {}
|
||||||
|
funcdata, datafunc = processDataSegs()
|
||||||
|
for node in callgraph:
|
||||||
|
if node in funcdata:
|
||||||
|
datas = funcdata[node]
|
||||||
|
for dd in datas:
|
||||||
|
refs = datafunc[dd]
|
||||||
|
refs = list(set(refs))
|
||||||
|
if node in datarefs:
|
||||||
|
print refs
|
||||||
|
datarefs[node] += refs
|
||||||
|
datarefs[node] = list(set(datarefs[node]))
|
||||||
|
else:
|
||||||
|
datarefs[node] = refs
|
||||||
|
return datarefs
|
||||||
|
|
||||||
|
|
BIN
Genius3/raw-feature-extractor/func.pyc
Normal file
BIN
Genius3/raw-feature-extractor/func.pyc
Normal file
Binary file not shown.
257
Genius3/raw-feature-extractor/graph_analysis_ida.py
Normal file
257
Genius3/raw-feature-extractor/graph_analysis_ida.py
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
from idautils import *
|
||||||
|
from idaapi import *
|
||||||
|
from idc import *
|
||||||
|
|
||||||
|
def getfunc_consts(func):
|
||||||
|
strings = []
|
||||||
|
consts = []
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
for bl in blocks:
|
||||||
|
strs, conts = getBBconsts(bl)
|
||||||
|
strings += strs
|
||||||
|
consts += conts
|
||||||
|
return strings, consts
|
||||||
|
|
||||||
|
def getConst(ea, offset):
|
||||||
|
strings = []
|
||||||
|
consts = []
|
||||||
|
optype1 = GetOpType(ea, offset)
|
||||||
|
if optype1 == idaapi.o_imm:
|
||||||
|
imm_value = GetOperandValue(ea, offset)
|
||||||
|
if 0<= imm_value <= 10:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
|
||||||
|
str_value = GetString(imm_value)
|
||||||
|
if str_value is None:
|
||||||
|
str_value = GetString(imm_value+0x40000)
|
||||||
|
if str_value is None:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||||
|
if re:
|
||||||
|
strings.append(str_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
re = all(40 <= ord(c) < 128 for c in str_value)
|
||||||
|
if re:
|
||||||
|
strings.append(str_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
else:
|
||||||
|
consts.append(imm_value)
|
||||||
|
return strings, consts
|
||||||
|
|
||||||
|
def getBBconsts(bl):
|
||||||
|
strings = []
|
||||||
|
consts = []
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
if opcode in ['la','jalr','call', 'jal']:
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
continue
|
||||||
|
strings_src, consts_src = getConst(inst_addr, 0)
|
||||||
|
strings_dst, consts_dst = getConst(inst_addr, 1)
|
||||||
|
strings += strings_src
|
||||||
|
strings += strings_dst
|
||||||
|
consts += consts_src
|
||||||
|
consts += consts_dst
|
||||||
|
try:
|
||||||
|
strings_dst, consts_dst = getConst(inst_addr, 2)
|
||||||
|
consts += consts_dst
|
||||||
|
strings += strings_dst
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return strings, consts
|
||||||
|
|
||||||
|
def getFuncCalls(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calCalls(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getLogicInsts(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calLogicInstructions(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getTransferInsts(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calTransferIns(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getIntrs(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
sumcalls = 0
|
||||||
|
for bl in blocks:
|
||||||
|
callnum = calInsts(bl)
|
||||||
|
sumcalls += callnum
|
||||||
|
return sumcalls
|
||||||
|
|
||||||
|
def getLocalVariables(func):
|
||||||
|
args_num = get_stackVariables(func.startEA)
|
||||||
|
return args_num
|
||||||
|
|
||||||
|
def getBasicBlocks(func):
|
||||||
|
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
|
||||||
|
return len(blocks)
|
||||||
|
|
||||||
|
def getIncommingCalls(func):
|
||||||
|
refs = CodeRefsTo(func.startEA, 0)
|
||||||
|
re = len([v for v in refs])
|
||||||
|
return re
|
||||||
|
|
||||||
|
|
||||||
|
def get_stackVariables(func_addr):
|
||||||
|
#print func_addr
|
||||||
|
args = []
|
||||||
|
stack = GetFrame(func_addr)
|
||||||
|
if not stack:
|
||||||
|
return 0
|
||||||
|
firstM = GetFirstMember(stack)
|
||||||
|
lastM = GetLastMember(stack)
|
||||||
|
i = firstM
|
||||||
|
while i <=lastM:
|
||||||
|
mName = GetMemberName(stack,i)
|
||||||
|
mSize = GetMemberSize(stack,i)
|
||||||
|
if mSize:
|
||||||
|
i = i + mSize
|
||||||
|
else:
|
||||||
|
i = i+4
|
||||||
|
if mName not in args and mName and 'var_' in mName:
|
||||||
|
args.append(mName)
|
||||||
|
return len(args)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def calArithmeticIns(bl):
|
||||||
|
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
|
||||||
|
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
|
||||||
|
calls = {}
|
||||||
|
calls.update(x86_AI)
|
||||||
|
calls.update(mips_AI)
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
if opcode in calls:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
||||||
|
|
||||||
|
def calCalls(bl):
|
||||||
|
calls = {'call':1, 'jal':1, 'jalr':1}
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
if opcode in calls:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
||||||
|
|
||||||
|
def calInsts(bl):
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
ea = start
|
||||||
|
num = 0
|
||||||
|
while ea < end:
|
||||||
|
num += 1
|
||||||
|
ea = NextHead(ea)
|
||||||
|
return num
|
||||||
|
|
||||||
|
def calLogicInstructions(bl):
|
||||||
|
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
|
||||||
|
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
|
||||||
|
calls = {}
|
||||||
|
calls.update(x86_LI)
|
||||||
|
calls.update(mips_LI)
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
if opcode in calls:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
||||||
|
|
||||||
|
def calSconstants(bl):
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
if opcode in calls:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
||||||
|
|
||||||
|
|
||||||
|
def calNconstants(bl):
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
optype1 = GetOpType(inst_addr, 0)
|
||||||
|
optype2 = GetOpType(inst_addr, 1)
|
||||||
|
if optype1 == 5 or optype2 == 5:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
||||||
|
|
||||||
|
def retrieveExterns(bl, ea_externs):
|
||||||
|
externs = []
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
refs = CodeRefsFrom(inst_addr, 1)
|
||||||
|
try:
|
||||||
|
ea = [v for v in refs if v in ea_externs][0]
|
||||||
|
externs.append(ea_externs[ea])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return externs
|
||||||
|
|
||||||
|
def calTransferIns(bl):
|
||||||
|
x86_TI = {'jmp':1, 'jz':1, 'jnz':1, 'js':1, 'je':1, 'jne':1, 'jg':1, 'jle':1, 'jge':1, 'ja':1, 'jnc':1, 'call':1}
|
||||||
|
mips_TI = {'beq':1, 'bne':1, 'bgtz':1, "bltz":1, "bgez":1, "blez":1, 'j':1, 'jal':1, 'jr':1, 'jalr':1}
|
||||||
|
arm_TI = {'MVN':1, "MOV":1}
|
||||||
|
calls = {}
|
||||||
|
calls.update(x86_TI)
|
||||||
|
calls.update(mips_TI)
|
||||||
|
start = bl[0]
|
||||||
|
end = bl[1]
|
||||||
|
invoke_num = 0
|
||||||
|
inst_addr = start
|
||||||
|
while inst_addr < end:
|
||||||
|
opcode = GetMnem(inst_addr)
|
||||||
|
re = [v for v in calls if opcode in v]
|
||||||
|
if len(re) > 0:
|
||||||
|
invoke_num += 1
|
||||||
|
inst_addr = NextHead(inst_addr)
|
||||||
|
return invoke_num
|
BIN
Genius3/raw-feature-extractor/graph_analysis_ida.pyc
Normal file
BIN
Genius3/raw-feature-extractor/graph_analysis_ida.pyc
Normal file
Binary file not shown.
26
Genius3/raw-feature-extractor/graph_property.py
Normal file
26
Genius3/raw-feature-extractor/graph_property.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import networkx as nx
|
||||||
|
import pdb
|
||||||
|
def betweeness(g):
|
||||||
|
#pdb.set_trace()
|
||||||
|
betweenness = nx.betweenness_centrality(g)
|
||||||
|
#print betweenness
|
||||||
|
return betweenness #list
|
||||||
|
|
||||||
|
def eigenvector(g):
|
||||||
|
centrality = nx.eigenvector_centrality(g)
|
||||||
|
return centrality
|
||||||
|
|
||||||
|
def closeness_centrality(g):
|
||||||
|
closeness = nx.closeness_centrality(g)
|
||||||
|
return closeness
|
||||||
|
|
||||||
|
def retrieveGP(g): #list,转化为float。将基本块级别的betweeness转化为函数级别的betweeness
|
||||||
|
bf = betweeness(g)
|
||||||
|
#close = closeness_centrality(g)
|
||||||
|
#bf_sim =
|
||||||
|
#close_sim =
|
||||||
|
x = sorted(bf.values())
|
||||||
|
value = sum(x)/len(x)
|
||||||
|
return round(value,5)
|
||||||
|
|
BIN
Genius3/raw-feature-extractor/graph_property.pyc
Normal file
BIN
Genius3/raw-feature-extractor/graph_property.pyc
Normal file
Binary file not shown.
56
Genius3/raw-feature-extractor/preprocessing_ida.py
Normal file
56
Genius3/raw-feature-extractor/preprocessing_ida.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from func import *
|
||||||
|
from raw_graphs import *
|
||||||
|
from idc import *
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import raw_graphs
|
||||||
|
|
||||||
|
def print_obj(obj):
|
||||||
|
"打印对象的所有属性"
|
||||||
|
print(obj.__dict__)
|
||||||
|
|
||||||
|
def parse_command():
|
||||||
|
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||||
|
parser.add_argument("--path", type=str, help="The directory where to store the generated .ida file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
#E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius3\acfgs" hpcenter
|
||||||
|
#print str(sys.argv) #['raw-feature-extractor/preprocessing_ida.py']
|
||||||
|
#print str(idc.ARGV) #['raw-feature-extractor/preprocessing_ida.py', '--path', 'C:\\Program1\\pycharmproject\\Genius3\\acfgs']
|
||||||
|
#print idc.ARGV[2]
|
||||||
|
#print type(idc.ARGV[2])
|
||||||
|
|
||||||
|
# E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -A -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius4\acfgs" hpcenter
|
||||||
|
#测试生成原始特征的时间。
|
||||||
|
start_t = time.clock()
|
||||||
|
|
||||||
|
args = parse_command()
|
||||||
|
#path = args.path
|
||||||
|
path = idc.ARGV[2]
|
||||||
|
analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
|
||||||
|
analysis_flags &= ~idc.AF_IMMOFF
|
||||||
|
# turn off "automatically make offset" heuristic
|
||||||
|
idc.SetShortPrm(idc.INF_START_AF, analysis_flags)
|
||||||
|
idaapi.autoWait()
|
||||||
|
cfgs = get_func_cfgs_c(FirstSeg())
|
||||||
|
|
||||||
|
end_t = time.clock()
|
||||||
|
print (end_t - start_t) #1.5934438s hpcenter 83.4 KB #35.6745299s SCGDW698 5.5mb #14.1480888s 762kb SCMQTTIot 这个时间包括ida分析二进制文件的时间和脚本生成对应原始特征的时间
|
||||||
|
# 应该是随着函数和基本块的数量增加而线性增加的,先不写了。可能ida分析二进制文件的占比比较高
|
||||||
|
|
||||||
|
binary_name = idc.GetInputFile() + '.ida'
|
||||||
|
print path
|
||||||
|
print binary_name
|
||||||
|
fullpath = os.path.join(path, binary_name)
|
||||||
|
pickle.dump(cfgs, open(fullpath,'w'))
|
||||||
|
#print binary_name
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#加上这句,脚本执行完就退出IDA
|
||||||
|
#idc.Exit(0)
|
BIN
Genius3/raw-feature-extractor/preprocessing_ida.pyc
Normal file
BIN
Genius3/raw-feature-extractor/preprocessing_ida.pyc
Normal file
Binary file not shown.
291
Genius3/raw-feature-extractor/raw_graphs.py
Normal file
291
Genius3/raw-feature-extractor/raw_graphs.py
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import itertools
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
|
||||||
|
sys.path.insert(1, 'C:/Python27/Lib/site-packages')
|
||||||
|
|
||||||
|
import networkx as nx
|
||||||
|
#import numpy as np
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
import pdb
|
||||||
|
import os
|
||||||
|
import re,mmap
|
||||||
|
#from graph_edit_new import *
|
||||||
|
|
||||||
|
class raw_graph:
|
||||||
|
def __init__(self, funcname, g, func_f):
|
||||||
|
#print "create"
|
||||||
|
self.funcname = funcname
|
||||||
|
self.old_g = g[0]
|
||||||
|
self.g = nx.DiGraph()
|
||||||
|
self.entry = g[1]
|
||||||
|
self.fun_features = func_f
|
||||||
|
self.attributing()
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.g)
|
||||||
|
|
||||||
|
def attributing(self):
|
||||||
|
self.obtainOffsprings(self.old_g)
|
||||||
|
for node in self.old_g:
|
||||||
|
fvector = self.retrieveVec(node, self.old_g)
|
||||||
|
self.g.add_node(node)
|
||||||
|
self.g.node[node]['v'] = fvector
|
||||||
|
|
||||||
|
for edge in self.old_g.edges():
|
||||||
|
node1 = edge[0]
|
||||||
|
node2 = edge[1]
|
||||||
|
self.g.add_edge(node1, node2)
|
||||||
|
|
||||||
|
def obtainOffsprings(self,g):
|
||||||
|
nodes = g.nodes()
|
||||||
|
for node in nodes:
|
||||||
|
offsprings = {}
|
||||||
|
self.getOffsprings(g, node, offsprings)
|
||||||
|
g.node[node]['offs'] = len(offsprings)
|
||||||
|
return g
|
||||||
|
|
||||||
|
def getOffsprings(self, g, node, offsprings):
|
||||||
|
node_offs = 0
|
||||||
|
sucs = g.successors(node)
|
||||||
|
for suc in sucs:
|
||||||
|
if suc not in offsprings:
|
||||||
|
offsprings[suc] = 1
|
||||||
|
self.getOffsprings(g, suc, offsprings)
|
||||||
|
|
||||||
|
def retrieveVec(self, id_, g):
|
||||||
|
feature_vec = []
|
||||||
|
#numC0
|
||||||
|
numc = g.node[id_]['consts']
|
||||||
|
feature_vec.append(numc)
|
||||||
|
#nums1
|
||||||
|
nums = g.node[id_]['strings']
|
||||||
|
feature_vec.append(nums)
|
||||||
|
#offsprings2
|
||||||
|
offs = g.node[id_]['offs']
|
||||||
|
feature_vec.append(offs)
|
||||||
|
#numAs3
|
||||||
|
numAs = g.node[id_]['numAs']
|
||||||
|
feature_vec.append(numAs)
|
||||||
|
# of calls4
|
||||||
|
calls = g.node[id_]['numCalls']
|
||||||
|
feature_vec.append(calls)
|
||||||
|
# of insts5
|
||||||
|
insts = g.node[id_]['numIns']
|
||||||
|
feature_vec.append(insts)
|
||||||
|
# of LIs6
|
||||||
|
insts = g.node[id_]['numLIs'] #
|
||||||
|
feature_vec.append(insts)
|
||||||
|
# of TIs7
|
||||||
|
insts = g.node[id_]['numTIs'] #transfer instructions
|
||||||
|
feature_vec.append(insts)
|
||||||
|
return feature_vec
|
||||||
|
|
||||||
|
|
||||||
|
def enumerating(self, n):
|
||||||
|
subgs = []
|
||||||
|
#pdb.set_trace()
|
||||||
|
for sub_nodes in itertools.combinations(self.g.nodes(), n):
|
||||||
|
subg = self.g.subgraph(sub_nodes)
|
||||||
|
u_subg = subg.to_undirected()
|
||||||
|
if nx.is_connected(u_subg):
|
||||||
|
subgs.append(subg)
|
||||||
|
return subgs
|
||||||
|
|
||||||
|
|
||||||
|
def genMotifs(self, n):
|
||||||
|
motifs = {}
|
||||||
|
subgs = enumerating(n)
|
||||||
|
for subg in subgs:
|
||||||
|
if len(motifs) == 0:
|
||||||
|
motifs[subg] = [subg]
|
||||||
|
else:
|
||||||
|
nomatch = True
|
||||||
|
for mt in motifs:
|
||||||
|
if nx.is_isomorphic(mt, subg):
|
||||||
|
motifs[mt].append(subg)
|
||||||
|
nomatch = False
|
||||||
|
if nomatch:
|
||||||
|
motifs[subg] = [subg]
|
||||||
|
return motifs
|
||||||
|
|
||||||
|
def enumerating_efficient(self, n):
|
||||||
|
#pdb.set_trace()
|
||||||
|
if len(self.g) >= 200:
|
||||||
|
return []
|
||||||
|
with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt','wb') as f:
|
||||||
|
nx.write_edgelist(self.g,f,data=False)
|
||||||
|
#pdb.set_trace()
|
||||||
|
process = Popen(["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n), "100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt", "1", "0", "0", "2", "0", "0", "0", "1000", "3", "3", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"], stdout=PIPE, stderr=PIPE)
|
||||||
|
stdout, stderr = process.communicate()
|
||||||
|
if process.returncode >= 0:
|
||||||
|
#os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1")
|
||||||
|
#pdb.set_trace()
|
||||||
|
#pdb.set_trace()
|
||||||
|
subgs = self.parseOutput("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n)
|
||||||
|
#pdb.set_trace()
|
||||||
|
os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump")
|
||||||
|
return subgs
|
||||||
|
return []
|
||||||
|
|
||||||
|
def parseOutput(self, path, n):
|
||||||
|
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
|
||||||
|
subgraphs = []
|
||||||
|
with open(path,'r') as f:
|
||||||
|
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
|
||||||
|
mo = re.findall(pattern, data)
|
||||||
|
if mo:
|
||||||
|
results = [map(int, v.split(',')[1:]) for v in mo]
|
||||||
|
subgraphs = self.createGraphDirectly(results)
|
||||||
|
return subgraphs
|
||||||
|
|
||||||
|
def parseOutputByconditions(self, path, n):
|
||||||
|
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
|
||||||
|
subgraphs = []
|
||||||
|
with open(path,'r') as f:
|
||||||
|
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
|
||||||
|
mo = re.findall(pattern, data)
|
||||||
|
if mo:
|
||||||
|
results = [map(int, v.split(',')[1:]) for v in mo]
|
||||||
|
subgraphs = self.create_Graphbycondition_Directly(results)
|
||||||
|
return subgraphs
|
||||||
|
|
||||||
|
def create_Graphbycondition_Directly(self, results):
|
||||||
|
subgs = []
|
||||||
|
for indexes in results:
|
||||||
|
tg = template_graph()
|
||||||
|
subg = self.g.subgraph(indexes)
|
||||||
|
tg.updateG(subg)
|
||||||
|
subgs.append(tg)
|
||||||
|
del tg
|
||||||
|
return subgs
|
||||||
|
|
||||||
|
def createGraphDirectly(self, results):
|
||||||
|
#pdb.set_trace()
|
||||||
|
#subgs = [self.g.subgraph(indexes) for indexes in results]
|
||||||
|
subgs = []
|
||||||
|
for indexes in results:
|
||||||
|
tg = template_graph()
|
||||||
|
subg = self.g.subgraph(indexes)
|
||||||
|
tg.updateG(subg)
|
||||||
|
subgs.append(tg)
|
||||||
|
del tg
|
||||||
|
return subgs
|
||||||
|
|
||||||
|
def createGraph(self, results, n):
|
||||||
|
binary_value = int(results[0],2)
|
||||||
|
indexes = [int(v) for v in results[1:]]
|
||||||
|
fang = self.createG(results[0], n)
|
||||||
|
if fang:
|
||||||
|
tg = template_graph(binary_value)
|
||||||
|
tg.updateG(fang, indexes, self.g)
|
||||||
|
return tg
|
||||||
|
pdb.set_trace()
|
||||||
|
print "there is g which is none"
|
||||||
|
|
||||||
|
def createG(self, binary_str, n):
|
||||||
|
g = nx.DiGraph()
|
||||||
|
l = [int(v) for v in binary_str]
|
||||||
|
#pdb.set_trace()
|
||||||
|
shape = (n, n)
|
||||||
|
data = np.array(l)
|
||||||
|
ad_matrix = data.reshape(shape)
|
||||||
|
for i in xrange(n):
|
||||||
|
for j in xrange(n):
|
||||||
|
if ad_matrix[i][j] == 1:
|
||||||
|
g.add_edge(i, j)
|
||||||
|
return g
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class raw_graphs: #创建空的list,然后存储raw_graphs类的instance
|
||||||
|
def __init__(self, binary_name):
|
||||||
|
self.binary_name = binary_name
|
||||||
|
self.raw_graph_list = []
|
||||||
|
|
||||||
|
def append(self, raw_g):
|
||||||
|
self.raw_graph_list.append(raw_g)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.raw_graph_list)
|
||||||
|
|
||||||
|
|
||||||
|
class graphlets:
|
||||||
|
def __init__(self, funcname):
|
||||||
|
self.funcname = funcname
|
||||||
|
self.graphlets_list = []
|
||||||
|
self.binary_name = None
|
||||||
|
|
||||||
|
def updateBN(self, binary_name):
|
||||||
|
self.binary_name = binary_name
|
||||||
|
|
||||||
|
def append(self, subg):
|
||||||
|
self.graphlets_list.append(subg)
|
||||||
|
|
||||||
|
def appendSet(self, subgs):
|
||||||
|
self.graphlets_list += subgs
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.graphlets_list)
|
||||||
|
|
||||||
|
class template_graph:
|
||||||
|
def __init__(self, value=None):
|
||||||
|
self.value = value
|
||||||
|
self.g = None
|
||||||
|
|
||||||
|
def updateG(self,g):
|
||||||
|
self.g = g
|
||||||
|
#def updateIndexes(self, indexes):
|
||||||
|
# self.indexes = indexes
|
||||||
|
|
||||||
|
#def updateAttributes(self, pg, indexes, maing):
|
||||||
|
# for id_ in xrange(len(indexes)):
|
||||||
|
# index = indexes[id_]
|
||||||
|
# gnode = self.findNode(index, maing)
|
||||||
|
# self.g.node[gnode] = pg.node[index]
|
||||||
|
|
||||||
|
|
||||||
|
class template_graphs:
|
||||||
|
def __init__(self, size):
|
||||||
|
self.size = size
|
||||||
|
self.gs = []
|
||||||
|
self.bit_len = None
|
||||||
|
|
||||||
|
def enumeratingAll(self):
|
||||||
|
subgs = []
|
||||||
|
binary_value = self.genBinValue()
|
||||||
|
for i in xrange(binary_value):
|
||||||
|
if i == 0 :
|
||||||
|
continue
|
||||||
|
g = self.createG(i)
|
||||||
|
if g:
|
||||||
|
tg = template_graph(i)
|
||||||
|
tg.updateG(g)
|
||||||
|
self.gs.append(tg)
|
||||||
|
|
||||||
|
def genBinValue(self):
|
||||||
|
n = self.size
|
||||||
|
self.bit_len = n*n
|
||||||
|
return 2**(self.bit_len)
|
||||||
|
|
||||||
|
def createG(self, i):
|
||||||
|
g = nx.DiGraph()
|
||||||
|
l = self.genArray(i)
|
||||||
|
#pdb.set_trace()
|
||||||
|
shape = (self.size, self.size)
|
||||||
|
data = np.array(l)
|
||||||
|
ad_matrix = data.reshape(shape)
|
||||||
|
for i in xrange(self.size):
|
||||||
|
for j in xrange(self.size):
|
||||||
|
if ad_matrix[i][j] == 1:
|
||||||
|
g.add_edge(i, j)
|
||||||
|
u_g = g.to_undirected()
|
||||||
|
if len(g) == self.size and nx.is_connected(u_g):
|
||||||
|
return g
|
||||||
|
return False
|
||||||
|
|
||||||
|
def genArray(self, i):
|
||||||
|
l = [int(x) for x in bin(i)[2:]]
|
||||||
|
x = [0 for v in xrange(self.bit_len - len(l))]
|
||||||
|
return x + l
|
BIN
Genius3/raw-feature-extractor/raw_graphs.pyc
Normal file
BIN
Genius3/raw-feature-extractor/raw_graphs.pyc
Normal file
Binary file not shown.
100
Genius3/raw-feature-extractor/read_idaFILE.py
Normal file
100
Genius3/raw-feature-extractor/read_idaFILE.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
# -*- coding: UTF-8 -*-
|
||||||
|
import sys
|
||||||
|
import sys
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
|
||||||
|
sys.path.insert(1, 'C:/Python27/Lib/site-packages')
|
||||||
|
import networkx as nx
|
||||||
|
def print_obj(obj):
|
||||||
|
"打印对象的所有属性"
|
||||||
|
print(obj.__dict__)
|
||||||
|
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
#sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant,可能是间接引用的,不识别。看了下所有函数的特征,几乎都没有字符串常量,可能都是写在别的地方然后引用的。
|
||||||
|
#sub_166C4 393
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
|
||||||
|
testpath = "C:\Program1\pycharmproject\Genius3/acfgs/hpcenter.ida"
|
||||||
|
fr = open(testpath, 'r')
|
||||||
|
data1 = pickle.load(fr) #一个二进制文件的acfgs
|
||||||
|
#print(type(data1))
|
||||||
|
#print_obj(data1)
|
||||||
|
#print data1.raw_graph_list[393]
|
||||||
|
#print_obj(data1.raw_graph_list[393])
|
||||||
|
#nx.draw(data1.raw_graph_list[393].g,with_labels=True)
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
|
print "一个二进制文件的所有函数的原始特征,list。"
|
||||||
|
print_obj(data1) #acfg list
|
||||||
|
print "\n"
|
||||||
|
|
||||||
|
print "一个函数的原始特征,由old_g(discovRe方法的ACFG),g(Genius方法的ACFG),fun_feature(表示函数级别的特征的向量)三部分构成"
|
||||||
|
print_obj(data1.raw_graph_list[393]) #一个函数的acfg
|
||||||
|
print "\n"
|
||||||
|
feature=data1.raw_graph_list[393].fun_features
|
||||||
|
print "函数级别特征: # 1 function calls # 2 logic instructions # 3 TransferIns # 4 LocalVariables # 5 BB basicblocks# 6 Edges # 7 IncommingCalls# 8 Intrs# 9 between # 10 strings # 11 consts"
|
||||||
|
print feature
|
||||||
|
print "\n"
|
||||||
|
|
||||||
|
|
||||||
|
# G=data1.raw_graph_list[393].old_g
|
||||||
|
# print G.node[0] # G.node[i]是dict
|
||||||
|
# for key, value in G.node[0].items():
|
||||||
|
# print('{key}:{value}'.format(key=key, value=value))
|
||||||
|
|
||||||
|
# 一个基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 字节点数量? #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 如AND #8'numTIs' 转移指令数量
|
||||||
|
G=data1.raw_graph_list[393].g
|
||||||
|
print "# 一个基本块的特征 #1'consts' 数字常量 #2'strings'字符串常量 #3'offs' offspring 字节点数量? #4'numAs' 算数指令如INC #5'numCalls' 调用指令 #6'numIns' 指令数量 #7'numLIs' LogicInstructions 如AND #8'numTIs' 转移指令数量"
|
||||||
|
print G.node[0]
|
||||||
|
print "\n"
|
||||||
|
# for key, value in G.node[0].items():
|
||||||
|
# print('{key}:{value}'.format(key=key, value=value))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#oldg就是读取IDA的CFG,所以数量、方向等都一样;g根据old_g生成,也一样
|
||||||
|
#old g
|
||||||
|
G = data1.raw_graph_list[393].old_g
|
||||||
|
nx.draw(G,with_labels=True)
|
||||||
|
#plt.title('old_g')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
# g
|
||||||
|
G = data1.raw_graph_list[393].g
|
||||||
|
nx.draw(G,with_labels=True)
|
||||||
|
#plt.title('Genius_g')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# draw graph with labels
|
||||||
|
pos = nx.spring_layout(G)
|
||||||
|
nx.draw(G, pos)
|
||||||
|
node_labels = nx.get_node_attributes(G, 'v') #networkx的node,由属性。g的属性为'v',意为原始特征的vector。old_g的属性见cfg_constructor.py
|
||||||
|
nx.draw_networkx_labels(G, pos, labels=node_labels)
|
||||||
|
#plt.title('Genius_g with raw feature vector')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
# 1 function calls(本函数的函数调用指令(call jal jalr)数量)。。注意arm中没有这些指令
|
||||||
|
|
||||||
|
# 2 logic instructions ,本函数的逻辑运算指令数量。如and、or的数量
|
||||||
|
|
||||||
|
# 3 TransferIns 转移指令(如jmp arm中为mov)数量
|
||||||
|
|
||||||
|
# 4 LocalVariables 局部变量数量
|
||||||
|
|
||||||
|
# 5 BB basicblocks数量
|
||||||
|
|
||||||
|
# 6 Edges icfg edges数量。icfg是另一篇论文dicovRe中的特征,这里暂时不管
|
||||||
|
|
||||||
|
# 7 IncommingCalls,调用本函数的指令数量
|
||||||
|
|
||||||
|
# 8 Intrs 指令数量
|
||||||
|
|
||||||
|
# 9 between 结构特征中的betweeness。
|
||||||
|
|
||||||
|
# 10 strings 字符串
|
||||||
|
|
||||||
|
# 11 consts 数字常量
|
8
Genius3/raw-feature-extractor/test.py
Normal file
8
Genius3/raw-feature-extractor/test.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
import pickle
|
||||||
|
testpath = "C:\Program1\pycharmproject\Genius3/acfgs/hpcenter.ida"
|
||||||
|
fr = open(testpath, 'r')
|
||||||
|
data1 = pickle.load(fr)
|
||||||
|
print(type(data1))
|
||||||
|
# # print_obj(data1)
|
||||||
|
# print cfgs.raw_graph_list[0]
|
356
Genius3/search-engine/db.py
Normal file
356
Genius3/search-engine/db.py
Normal file
@ -0,0 +1,356 @@
|
|||||||
|
import cPickle as pickle
|
||||||
|
from search import *
|
||||||
|
from nearpy import Engine
|
||||||
|
from nearpy.hashes import RandomDiscretizedProjections
|
||||||
|
from nearpy.filters import NearestFilter, UniqueFilter
|
||||||
|
from nearpy.distances import EuclideanDistance
|
||||||
|
from nearpy.distances import CosineDistance
|
||||||
|
from nearpy.hashes import RandomBinaryProjections
|
||||||
|
from nearpy.experiments import DistanceRatioExperiment
|
||||||
|
from redis import Redis
|
||||||
|
from nearpy.storage import RedisStorage
|
||||||
|
from feature import *
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import pdb
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
from refactoring import *
|
||||||
|
import pymongo
|
||||||
|
from pymongo import MongoClient
|
||||||
|
|
||||||
|
def initDB():
|
||||||
|
client = MongoClient()
|
||||||
|
client = MongoClient('localhost', 27017)
|
||||||
|
client = MongoClient('mongodb://localhost:27017/')
|
||||||
|
db = client.test_database
|
||||||
|
db = client['iot-encoding']
|
||||||
|
return db
|
||||||
|
|
||||||
|
db = initDB()
|
||||||
|
posts = db.posts
|
||||||
|
|
||||||
|
class db:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.feature_list = {}
|
||||||
|
self.engine = None
|
||||||
|
|
||||||
|
def loadHashmap(self, feature_size, result_n):
|
||||||
|
# Create redis storage adapter
|
||||||
|
redis_object = Redis(host='localhost', port=6379, db=0)
|
||||||
|
redis_storage = RedisStorage(redis_object)
|
||||||
|
pdb.set_trace()
|
||||||
|
try:
|
||||||
|
# Get hash config from redis
|
||||||
|
config = redis_storage.load_hash_configuration('test')
|
||||||
|
# Config is existing, create hash with None parameters
|
||||||
|
lshash = RandomBinaryProjections(None, None)
|
||||||
|
# Apply configuration loaded from redis
|
||||||
|
lshash.apply_config(config)
|
||||||
|
|
||||||
|
except:
|
||||||
|
# Config is not existing, create hash from scratch, with 10 projections
|
||||||
|
lshash = RandomBinaryProjections('test', 0)
|
||||||
|
|
||||||
|
|
||||||
|
# Create engine for feature space of 100 dimensions and use our hash.
|
||||||
|
# This will set the dimension of the lshash only the first time, not when
|
||||||
|
# using the configuration loaded from redis. Use redis storage to store
|
||||||
|
# buckets.
|
||||||
|
nearest = NearestFilter(1000)
|
||||||
|
#self.engine = Engine(feature_size, lshashes=[], vector_filters=[])
|
||||||
|
pdb.set_trace()
|
||||||
|
self.engine = Engine(192, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance())
|
||||||
|
|
||||||
|
# Do some stuff like indexing or querying with the engine...
|
||||||
|
|
||||||
|
# Finally store hash configuration in redis for later use
|
||||||
|
redis_storage.store_hash_configuration(lshash)
|
||||||
|
|
||||||
|
def appendToDB(self, binary_name, funcname, fvector, firmware_name=""):
|
||||||
|
if fvector is None:
|
||||||
|
return
|
||||||
|
#ftuple = tuple([fvector])
|
||||||
|
self.engine.store_vector(np.asarray(fvector), ".".join((firmware_name,binary_name,funcname)))
|
||||||
|
|
||||||
|
def batch_appendDB(self, binary_name, features, firmware_name=""):
|
||||||
|
for funcname in features:
|
||||||
|
feature = features[funcname]
|
||||||
|
#pdb.set_trace()
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
|
||||||
|
def batch_appendDBbyDir(self, base_dir):
|
||||||
|
cursor = posts.find({"firmware_name":"ddwrt-r21676_result"})
|
||||||
|
i = 0
|
||||||
|
for v in cursor:
|
||||||
|
print i
|
||||||
|
i+=1
|
||||||
|
binary_name = v['binary_name']
|
||||||
|
funcname = v['func_name']
|
||||||
|
firmware_name = v['firmware_name']
|
||||||
|
feature = v['fvector']
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
|
||||||
|
def batch_appendDBbyDir1(self, base_dir):
|
||||||
|
image_dir = os.path.join(base_dir, "image")
|
||||||
|
firmware_featrues={}
|
||||||
|
bnum = 0
|
||||||
|
fnum = 0
|
||||||
|
i = 0
|
||||||
|
pdb.set_trace()
|
||||||
|
for firmware_name in os.listdir(image_dir):
|
||||||
|
print firmware_name
|
||||||
|
firmware_featrues[firmware_name] = {}
|
||||||
|
firmware_dir = os.path.join(image_dir, firmware_name)
|
||||||
|
for binary_name in os.listdir(firmware_dir):
|
||||||
|
if binary_name.endswith(".features"):
|
||||||
|
bnum += 1
|
||||||
|
featrues_dir = os.path.join(firmware_dir, binary_name)
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for funcname in featrues:
|
||||||
|
fnum +=1
|
||||||
|
#pdb.set_trace()
|
||||||
|
feature = featrues[funcname]
|
||||||
|
self.appendToDB(binary_name, funcname, feature, firmware_name)
|
||||||
|
del featrues
|
||||||
|
print("bnum ", bnum)
|
||||||
|
print("fnum ", fnum)
|
||||||
|
|
||||||
|
def dump(self, base_dir):
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
||||||
|
pickle.dump(self.feature_list, open(db_dir, 'w'))
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
||||||
|
pickle.dump(self.engine, open(db_dir, 'w'))
|
||||||
|
|
||||||
|
def loadDB(self, base_dir):
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
|
||||||
|
self.feature_list = pickle.load(open(db_dir, 'r'))
|
||||||
|
db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
|
||||||
|
self.engine = pickle.load(open(db_dir, 'r'))
|
||||||
|
|
||||||
|
def findF(self, binary_name, funcname):
|
||||||
|
x = [v for v in self.feature_list if binary_name in self.feature_list[v] and funcname in self.feature_list[v][binary_name]]
|
||||||
|
return x[0]
|
||||||
|
|
||||||
|
def retrieveFeaturesByDir(n, base_dir):
|
||||||
|
firmware_featrues={}
|
||||||
|
i = 0
|
||||||
|
for firmware_name in os.listdir(base_dir):
|
||||||
|
if firmware_name.endWith(".features"):
|
||||||
|
firmware_featrues[firmware_name] = {}
|
||||||
|
firmware_dir = os.path.join(base_dir, firmware_name)
|
||||||
|
if i > 0:
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
pdb.set_trace()
|
||||||
|
for binary_name in os.listdir(firmware_dir):
|
||||||
|
featrues_dir = os.path.join(firmware_dir, binary_name + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for funcname in featrues:
|
||||||
|
feature = featrues[funcname]
|
||||||
|
self.appendToDB(firmware_name, binary_name, funcname, feature)
|
||||||
|
del featrues
|
||||||
|
|
||||||
|
def retrieveFeatures(n, base_dir, filename, funcs):
|
||||||
|
feature_dic = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, "5000", filename + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
#featuresx = retrieveFeaturesx(filename)
|
||||||
|
for name in featrues:
|
||||||
|
#if name in funcs:
|
||||||
|
x = featrues[name]
|
||||||
|
#+ featuresx[name]
|
||||||
|
feature_dic[name] = np.asarray(x)
|
||||||
|
return feature_dic
|
||||||
|
|
||||||
|
def retrieveVuldb(base_input_dir):
|
||||||
|
vul_path = os.path.join(base_input_dir, "vul")
|
||||||
|
vul_db = pickle.load(open(vul_path, "r"))
|
||||||
|
return vul_db
|
||||||
|
|
||||||
|
|
||||||
|
def retrieveFeaturesx(filename):
|
||||||
|
ida_input_dir = os.path.join("./data/", filename + ".features")
|
||||||
|
featuresx = pickle.load(open(ida_input_dir, "r"))
|
||||||
|
return featuresx
|
||||||
|
|
||||||
|
def retrieveQueries(n, base_dir, filename1, featrues_src):
|
||||||
|
queries = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, "5000", filename1 + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
#featuresx = retrieveFeaturesx(filename1)
|
||||||
|
for name in featrues:
|
||||||
|
#if name in featrues_src:
|
||||||
|
x = featrues[name]
|
||||||
|
#+ featuresx[name]
|
||||||
|
queries[name] = np.asarray(x)
|
||||||
|
return queries
|
||||||
|
|
||||||
|
def retrieveQueriesbyDir(n, base_dir, firmware_name, filename1):
|
||||||
|
queries = {}
|
||||||
|
featrues_dir = os.path.join(base_dir, firmware_name, filename1 + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
for name in featrues:
|
||||||
|
#del featrues[name][5]
|
||||||
|
queries[name] = np.asarray(featrues[name])
|
||||||
|
return queries
|
||||||
|
|
||||||
|
def retrieveQuery(n, base_dir, filename, funcname):
|
||||||
|
featrues_dir = os.path.join(base_dir, filename + "_cb" + str(n) + ".features")
|
||||||
|
featrues = pickle.load(open(featrues_dir, "r"))
|
||||||
|
f = [featrues[v] for v in featrues if funcname in v ][0]
|
||||||
|
return np.asarray(f)
|
||||||
|
|
||||||
|
def parse_command():
|
||||||
|
parser = argparse.ArgumentParser(description='Process some integers.')
|
||||||
|
parser.add_argument("--base_input_dir", type=str, help="raw binaries to process for training")
|
||||||
|
parser.add_argument('--output_dir', type=str, help="output dir")
|
||||||
|
parser.add_argument("--filename1", type=str, help="the size of each graphlet")
|
||||||
|
parser.add_argument("--filename2", type=str, help="the size of each graphlet")
|
||||||
|
parser.add_argument("--size", type=int, help="the size of each graphlet")
|
||||||
|
#parser.add_argument("--size", type=int, help="the size of each graphlet")
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
def loadFuncs(path):
|
||||||
|
funcs = {}
|
||||||
|
x86_dir = os.path.join(path, "func_candid")
|
||||||
|
#mips_dir = os.path.join(path, "openssl1.0.1a_mips.ida")
|
||||||
|
fp = open(x86_dir,"r")
|
||||||
|
for line in fp:
|
||||||
|
items = line.split("\n")
|
||||||
|
funcname = items[0]
|
||||||
|
funcs[funcname] = 1
|
||||||
|
return funcs
|
||||||
|
|
||||||
|
def dump(path, featrues, queries):
|
||||||
|
fp = open(path + "/" + "matrix", 'w')
|
||||||
|
for name in featrues:
|
||||||
|
row = []
|
||||||
|
row.append("x86")
|
||||||
|
row.append(name)
|
||||||
|
row += featrues[name]
|
||||||
|
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" %tuple(row))
|
||||||
|
for name in queries:
|
||||||
|
row = []
|
||||||
|
row.append("mips")
|
||||||
|
row.append(name)
|
||||||
|
row += queries[name]
|
||||||
|
fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" % tuple(row))
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
|
||||||
|
def queryBytwo(base_input_dir, filename1, filename2, n):
|
||||||
|
threthold = 50
|
||||||
|
db_instance = db()
|
||||||
|
funcs = loadFuncs(base_input_dir)
|
||||||
|
db_instance.loadHashmap(n, 50000)
|
||||||
|
#pdb.set_trace()
|
||||||
|
featrues = retrieveFeatures(n, base_input_dir, filename1, funcs)
|
||||||
|
queries = retrieveQueries(n, base_input_dir, filename2, funcs)
|
||||||
|
#queries = refactoring(queries, featrues)
|
||||||
|
vul_db = retrieveVuldb(base_input_dir)
|
||||||
|
pdb.set_trace()
|
||||||
|
#dump(base_input_dir, featrues, queries)
|
||||||
|
#start = time.time()
|
||||||
|
#db_instance.batch_appendDBbyDir(base_input_dir)
|
||||||
|
#end = time.time()
|
||||||
|
#total = end - start
|
||||||
|
#print total
|
||||||
|
db_instance.batch_appendDB(filename1, featrues)
|
||||||
|
pdb.set_trace()
|
||||||
|
ranks = []
|
||||||
|
times = []
|
||||||
|
for threthold in xrange(1, 210, 10):
|
||||||
|
hit = []
|
||||||
|
i = 0
|
||||||
|
for name in queries:
|
||||||
|
#print i
|
||||||
|
i += 1
|
||||||
|
'''
|
||||||
|
if i == 1000:
|
||||||
|
print (sum(times)/len(times))
|
||||||
|
pdb.set_trace()
|
||||||
|
print "s"
|
||||||
|
'''
|
||||||
|
#if name not in vul_db['openssl']:
|
||||||
|
# continue
|
||||||
|
if name not in featrues:
|
||||||
|
continue
|
||||||
|
#pdb.set_trace()
|
||||||
|
query = queries[name]
|
||||||
|
#start = time.time()
|
||||||
|
x = db_instance.engine.neighbours(query)
|
||||||
|
#end = time.time()
|
||||||
|
#total = end - start
|
||||||
|
#times.append(total)
|
||||||
|
#print total
|
||||||
|
#pdb.set_trace()
|
||||||
|
try:
|
||||||
|
rank = [v for v in xrange(len(x)) if name in x[v][1]][0]
|
||||||
|
ranks.append((name, rank))
|
||||||
|
if rank <= threthold:
|
||||||
|
hit.append(1)
|
||||||
|
else:
|
||||||
|
hit.append(0)
|
||||||
|
except:
|
||||||
|
#pdb.set_trace()
|
||||||
|
hit.append(0)
|
||||||
|
pass
|
||||||
|
#pdb.set_trace()
|
||||||
|
acc = sum(hit) * 1.0 / len(hit)
|
||||||
|
print acc
|
||||||
|
|
||||||
|
def queryAll(base_dir, firmware_name, filename1, n):
|
||||||
|
threthold = 155
|
||||||
|
db_instance = db()
|
||||||
|
db_instance.loadHashmap(n, 50000)
|
||||||
|
queries = retrieveQueriesbyDir(n, base_dir, firmware_name, filename1)
|
||||||
|
start = time.time()
|
||||||
|
pdb.set_trace()
|
||||||
|
db_instance.batch_appendDBbyDir(n, base_dir)
|
||||||
|
end = time.time()
|
||||||
|
dur = end - start
|
||||||
|
print dur
|
||||||
|
pdb.set_trace()
|
||||||
|
hit = []
|
||||||
|
i = 0
|
||||||
|
times = []
|
||||||
|
for name in queries:
|
||||||
|
print i
|
||||||
|
i += 1
|
||||||
|
query = queries[name]
|
||||||
|
start = time.clock()
|
||||||
|
x = db_instance.engine.neighbours(query)
|
||||||
|
end = time.clock()
|
||||||
|
dur = end - start
|
||||||
|
times.append(dur)
|
||||||
|
#pdb.set_trace()
|
||||||
|
try:
|
||||||
|
rank = [v for v in xrange(len(x)) if name in x[v][1]]
|
||||||
|
if len(rank) > 1:
|
||||||
|
pdb.set_trace()
|
||||||
|
print "stop"
|
||||||
|
if rank[0] <= threthold:
|
||||||
|
hit.append(1)
|
||||||
|
else:
|
||||||
|
hit.append(0)
|
||||||
|
except:
|
||||||
|
hit.append(0)
|
||||||
|
|
||||||
|
acc = sum(hit) * 1.0 / len(hit)
|
||||||
|
mean = np.mean(times)
|
||||||
|
std = np.std(times)
|
||||||
|
#pdb.set_trace()
|
||||||
|
print acc
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_command()
|
||||||
|
base_dir = args.base_input_dir
|
||||||
|
filename1 = args.filename1
|
||||||
|
filename2 = args.filename2
|
||||||
|
n = args.size
|
||||||
|
pdb.set_trace()
|
||||||
|
queryBytwo(base_dir, filename1, filename2, n)
|
15
README.md
15
README.md
@ -1,9 +1,22 @@
|
|||||||
|
Forked from qian-feng/Gencoding. Genius3 is my pycharm project.
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
This project provides two components of Genius, a graph-based bug search framework. The first component is the raw feature extraction. The second is the online bug search engine.
|
This project provides two components of Genius, a graph-based bug search framework. The first component is the raw feature extraction. The second is the online bug search engine.
|
||||||
|
|
||||||
1. The raw feature extraction is designed to achieve following two goals:
|
1. The raw feature extraction is designed to achieve following two goals:
|
||||||
|
|
||||||
-> Extract the control flow graph for each binary function
|
-> Extract the control flow graph for each binary function
|
||||||
-> Extract the attributes for each node in the graph
|
|
||||||
|
-> Extract the attributes for each node in the grap
|
||||||
|
|
||||||
|
The feature extraction is built on top of IDA-pro. We wrote the scripts based on ida-python and extract the attributed control flow graph. ``preprocessing_ida.py'' is the main program to extract the ACFG.
|
||||||
|
|
||||||
2. The online bug search engine is used for real-time search:
|
2. The online bug search engine is used for real-time search:
|
||||||
|
|
||||||
-> It utilized localality sensitive hashing for indexing
|
-> It utilized localality sensitive hashing for indexing
|
||||||
|
|
||||||
-> Nearest-neighbor search algorithm for search
|
-> Nearest-neighbor search algorithm for search
|
||||||
|
|
||||||
|
The online search is based on nearpy (https://github.com/pixelogik/NearPy).
|
||||||
|
|
||||||
|
24
raw-feature-extractor/graph_property.py
Normal file
24
raw-feature-extractor/graph_property.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import networkx as nx
|
||||||
|
import pdb
|
||||||
|
def betweeness(g):
|
||||||
|
#pdb.set_trace()
|
||||||
|
betweenness = nx.betweenness_centrality(g)
|
||||||
|
return betweenness
|
||||||
|
|
||||||
|
def eigenvector(g):
|
||||||
|
centrality = nx.eigenvector_centrality(g)
|
||||||
|
return centrality
|
||||||
|
|
||||||
|
def closeness_centrality(g):
|
||||||
|
closeness = nx.closeness_centrality(g)
|
||||||
|
return closeness
|
||||||
|
|
||||||
|
def retrieveGP(g):
|
||||||
|
bf = betweeness(g)
|
||||||
|
#close = closeness_centrality(g)
|
||||||
|
#bf_sim =
|
||||||
|
#close_sim =
|
||||||
|
x = sorted(bf.values())
|
||||||
|
value = sum(x)/len(x)
|
||||||
|
return round(value,5)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user