完善jsonl文件打印

This commit is contained in:
huihun 2023-12-02 21:53:57 +08:00
parent df06fd866b
commit 5de6c4568c
33 changed files with 66029 additions and 630 deletions

View File

@ -1,8 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$">
<orderEntry type="inheritedJdk" /> <sourceFolder url="file://$MODULE_DIR$/python_lib" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 2.7" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
</module> </module>

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="D:\Program\miniconda3\envs\pyqt" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7" project-jdk-type="Python SDK" />
</project> </project>

View File

@ -16,3 +16,9 @@ This project provides two components of Genius, a graph-based bug search framewo
The online search is based on nearpy (https://github.com/pixelogik/NearPy). The online search is based on nearpy (https://github.com/pixelogik/NearPy).
```
D:\IDA_Pro_v6.8\idaq.exe -c -S"raw-feature-extractor/preprocessing_ida.py" file_name
D:\IDA_Pro_v6.8\idaq.exe -c -S"raw-feature-extractor/preprocessing_ida.py --path XXX" file_name
```

Binary file not shown.

Binary file not shown.

BIN
python_lib/PySide/QtGui.pyd Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
python_lib/PySide/QtSql.pyd Normal file

Binary file not shown.

BIN
python_lib/PySide/QtSvg.pyd Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
python_lib/PySide/QtXml.pyd Normal file

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,3 @@
__all__ = ['QtCore', 'QtGui', 'QtNetwork', 'QtOpenGL', 'QtSql', 'QtSvg', 'QtTest', 'QtWebKit', 'QtScript']
__version__ = "1.1.2"
__version_info__ = (1, 1, 2, "final", 1)

Binary file not shown.

Binary file not shown.

Binary file not shown.

55604
python_lib/idaapi.py Normal file

File diff suppressed because it is too large Load Diff

830
python_lib/idautils.py Normal file
View File

@ -0,0 +1,830 @@
#---------------------------------------------------------------------
# IDAPython - Python plugin for Interactive Disassembler
#
# Copyright (c) 2004-2010 Gergely Erdelyi <gergely.erdelyi@d-dome.net>
#
# All rights reserved.
#
# For detailed copyright information see the file COPYING in
# the root of the distribution archive.
#---------------------------------------------------------------------
"""
idautils.py - High level utility functions for IDA
"""
import idaapi
import idc
import types
import os
def refs(ea, funcfirst, funcnext):
"""
Generic reference collector - INTERNAL USE ONLY.
"""
ref = funcfirst(ea)
while ref != idaapi.BADADDR:
yield ref
ref = funcnext(ea, ref)
def CodeRefsTo(ea, flow):
"""
Get a list of code references to 'ea'
@param ea: Target address
@param flow: Follow normal code flow or not
@type flow: Boolean (0/1, False/True)
@return: list of references (may be empty list)
Example::
for ref in CodeRefsTo(ScreenEA(), 1):
print ref
"""
if flow == 1:
return refs(ea, idaapi.get_first_cref_to, idaapi.get_next_cref_to)
else:
return refs(ea, idaapi.get_first_fcref_to, idaapi.get_next_fcref_to)
def CodeRefsFrom(ea, flow):
"""
Get a list of code references from 'ea'
@param ea: Target address
@param flow: Follow normal code flow or not
@type flow: Boolean (0/1, False/True)
@return: list of references (may be empty list)
Example::
for ref in CodeRefsFrom(ScreenEA(), 1):
print ref
"""
if flow == 1:
return refs(ea, idaapi.get_first_cref_from, idaapi.get_next_cref_from)
else:
return refs(ea, idaapi.get_first_fcref_from, idaapi.get_next_fcref_from)
def DataRefsTo(ea):
"""
Get a list of data references to 'ea'
@param ea: Target address
@return: list of references (may be empty list)
Example::
for ref in DataRefsTo(ScreenEA()):
print ref
"""
return refs(ea, idaapi.get_first_dref_to, idaapi.get_next_dref_to)
def DataRefsFrom(ea):
"""
Get a list of data references from 'ea'
@param ea: Target address
@return: list of references (may be empty list)
Example::
for ref in DataRefsFrom(ScreenEA()):
print ref
"""
return refs(ea, idaapi.get_first_dref_from, idaapi.get_next_dref_from)
def XrefTypeName(typecode):
"""
Convert cross-reference type codes to readable names
@param typecode: cross-reference type code
"""
ref_types = {
0 : 'Data_Unknown',
1 : 'Data_Offset',
2 : 'Data_Write',
3 : 'Data_Read',
4 : 'Data_Text',
5 : 'Data_Informational',
16 : 'Code_Far_Call',
17 : 'Code_Near_Call',
18 : 'Code_Far_Jump',
19 : 'Code_Near_Jump',
20 : 'Code_User',
21 : 'Ordinary_Flow'
}
assert typecode in ref_types, "unknown reference type %d" % typecode
return ref_types[typecode]
def _copy_xref(xref):
""" Make a private copy of the xref class to preserve its contents """
class _xref(object):
pass
xr = _xref()
for attr in [ 'frm', 'to', 'iscode', 'type', 'user' ]:
setattr(xr, attr, getattr(xref, attr))
return xr
def XrefsFrom(ea, flags=0):
"""
Return all references from address 'ea'
@param ea: Reference address
@param flags: any of idaapi.XREF_* flags
Example::
for xref in XrefsFrom(here(), 0):
print xref.type, XrefTypeName(xref.type), \
'from', hex(xref.frm), 'to', hex(xref.to)
"""
xref = idaapi.xrefblk_t()
if xref.first_from(ea, flags):
yield _copy_xref(xref)
while xref.next_from():
yield _copy_xref(xref)
def XrefsTo(ea, flags=0):
"""
Return all references to address 'ea'
@param ea: Reference address
@param flags: any of idaapi.XREF_* flags
Example::
for xref in XrefsTo(here(), 0):
print xref.type, XrefTypeName(xref.type), \
'from', hex(xref.frm), 'to', hex(xref.to)
"""
xref = idaapi.xrefblk_t()
if xref.first_to(ea, flags):
yield _copy_xref(xref)
while xref.next_to():
yield _copy_xref(xref)
def Threads():
"""Returns all thread IDs"""
for i in xrange(0, idc.GetThreadQty()):
yield idc.GetThreadId(i)
def Heads(start=None, end=None):
"""
Get a list of heads (instructions or data)
@param start: start address (default: inf.minEA)
@param end: end address (default: inf.maxEA)
@return: list of heads between start and end
"""
if not start: start = idaapi.cvar.inf.minEA
if not end: end = idaapi.cvar.inf.maxEA
ea = start
if not idc.isHead(idc.GetFlags(ea)):
ea = idaapi.next_head(ea, end)
while ea != idaapi.BADADDR:
yield ea
ea = idaapi.next_head(ea, end)
def Functions(start=None, end=None):
"""
Get a list of functions
@param start: start address (default: inf.minEA)
@param end: end address (default: inf.maxEA)
@return: list of heads between start and end
@note: The last function that starts before 'end' is included even
if it extends beyond 'end'. Any function that has its chunks scattered
in multiple segments will be reported multiple times, once in each segment
as they are listed.
"""
if not start: start = idaapi.cvar.inf.minEA
if not end: end = idaapi.cvar.inf.maxEA
# find first function head chunk in the range
chunk = idaapi.get_fchunk(start)
if not chunk:
chunk = idaapi.get_next_fchunk(start)
while chunk and chunk.startEA < end and (chunk.flags & idaapi.FUNC_TAIL) != 0:
chunk = idaapi.get_next_fchunk(chunk.startEA)
func = chunk
while func and func.startEA < end:
startea = func.startEA
yield startea
func = idaapi.get_next_func(startea)
def Chunks(start):
"""
Get a list of function chunks
@param start: address of the function
@return: list of funcion chunks (tuples of the form (start_ea, end_ea))
belonging to the function
"""
func_iter = idaapi.func_tail_iterator_t( idaapi.get_func( start ) )
status = func_iter.main()
while status:
chunk = func_iter.chunk()
yield (chunk.startEA, chunk.endEA)
status = func_iter.next()
def Modules():
"""
Returns a list of module objects with name,size,base and the rebase_to attributes
"""
mod = idaapi.module_info_t()
result = idaapi.get_first_module(mod)
while result:
yield idaapi.object_t(name=mod.name, size=mod.size, base=mod.base, rebase_to=mod.rebase_to)
result = idaapi.get_next_module(mod)
def Names():
"""
Returns a list of names
@return: List of tuples (ea, name)
"""
for i in xrange(idaapi.get_nlist_size()):
ea = idaapi.get_nlist_ea(i)
name = idaapi.get_nlist_name(i)
yield (ea, name)
def Segments():
"""
Get list of segments (sections) in the binary image
@return: List of segment start addresses.
"""
for n in xrange(idaapi.get_segm_qty()):
seg = idaapi.getnseg(n)
if seg:
yield seg.startEA
def Entries():
"""
Returns a list of entry points
@return: List of tuples (index, ordinal, ea, name)
"""
n = idaapi.get_entry_qty()
for i in xrange(0, n):
ordinal = idaapi.get_entry_ordinal(i)
ea = idaapi.get_entry(ordinal)
name = idaapi.get_entry_name(ordinal)
yield (i, ordinal, ea, name)
def FuncItems(start):
"""
Get a list of function items
@param start: address of the function
@return: ea of each item in the function
"""
func = idaapi.get_func(start)
if not func:
return
fii = idaapi.func_item_iterator_t()
ok = fii.set(func)
while ok:
yield fii.current()
ok = fii.next_code()
def Structs():
"""
Get a list of structures
@return: List of tuples (idx, sid, name)
"""
idx = idc.GetFirstStrucIdx()
while idx != idaapi.BADADDR:
sid = idc.GetStrucId(idx)
yield (idx, sid, idc.GetStrucName(sid))
idx = idc.GetNextStrucIdx(idx)
def StructMembers(sid):
"""
Get a list of structure members information (or stack vars if given a frame).
@param sid: ID of the structure.
@return: List of tuples (offset, name, size)
@note: If 'sid' does not refer to a valid structure,
an exception will be raised.
@note: This will not return 'holes' in structures/stack frames;
it only returns defined structure members.
"""
m = idc.GetFirstMember(sid)
if m == -1:
raise Exception("No structure with ID: 0x%x" % sid)
while (m != idaapi.BADADDR):
name = idc.GetMemberName(sid, m)
if name:
yield (m, name, idc.GetMemberSize(sid, m))
m = idc.GetStrucNextOff(sid, m)
def DecodePrecedingInstruction(ea):
"""
Decode preceding instruction in the execution flow.
@param ea: address to decode
@return: (None or the decode instruction, farref)
farref will contain 'true' if followed an xref, false otherwise
"""
prev_addr, farref = idaapi.decode_preceding_insn(ea)
if prev_addr == idaapi.BADADDR:
return (None, False)
else:
return (idaapi.cmd.copy(), farref)
def DecodePreviousInstruction(ea):
"""
Decodes the previous instruction and returns an insn_t like class
@param ea: address to decode
@return: None or a new insn_t instance
"""
prev_addr = idaapi.decode_prev_insn(ea)
if prev_addr == idaapi.BADADDR:
return None
return idaapi.cmd.copy()
def DecodeInstruction(ea):
"""
Decodes an instruction and returns an insn_t like class
@param ea: address to decode
@return: None or a new insn_t instance
"""
inslen = idaapi.decode_insn(ea)
if inslen == 0:
return None
return idaapi.cmd.copy()
def GetDataList(ea, count, itemsize=1):
"""
Get data list - INTERNAL USE ONLY
"""
if itemsize == 1:
getdata = idaapi.get_byte
elif itemsize == 2:
getdata = idaapi.get_word
elif itemsize == 4:
getdata = idaapi.get_long
elif itemsize == 8:
getdata = idaapi.get_qword
else:
raise ValueError, "Invalid data size! Must be 1, 2, 4 or 8"
endea = ea + itemsize * count
curea = ea
while curea < endea:
yield getdata(curea)
curea += itemsize
def PutDataList(ea, datalist, itemsize=1):
"""
Put data list - INTERNAL USE ONLY
"""
putdata = None
if itemsize == 1:
putdata = idaapi.patch_byte
if itemsize == 2:
putdata = idaapi.patch_word
if itemsize == 4:
putdata = idaapi.patch_long
assert putdata, "Invalid data size! Must be 1, 2 or 4"
for val in datalist:
putdata(ea, val)
ea = ea + itemsize
def MapDataList(ea, length, func, wordsize=1):
"""
Map through a list of data words in the database
@param ea: start address
@param length: number of words to map
@param func: mapping function
@param wordsize: size of words to map [default: 1 byte]
@return: None
"""
PutDataList(ea, map(func, GetDataList(ea, length, wordsize)), wordsize)
def GetInputFileMD5():
"""
Return the MD5 hash of the input binary file
@return: MD5 string or None on error
"""
return idc.GetInputMD5()
class Strings(object):
"""
Allows iterating over the string list. The set of strings will not be modified.
, unless asked explicitly at setup()-time..
Example:
s = Strings()
for i in s:
print "%x: len=%d type=%d -> '%s'" % (i.ea, i.length, i.type, str(i))
"""
class StringItem(object):
"""
Class representing each string item.
"""
def __init__(self, si):
self.ea = si.ea
"""String ea"""
self.type = si.type
"""string type (ASCSTR_xxxxx)"""
self.length = si.length
"""string length"""
def is_1_byte_encoding(self):
return not self.is_2_bytes_encoding() and not self.is_4_bytes_encoding()
def is_2_bytes_encoding(self):
return (self.type & 7) in [idaapi.ASCSTR_UTF16, idaapi.ASCSTR_ULEN2, idaapi.ASCSTR_ULEN4]
def is_4_bytes_encoding(self):
return (self.type & 7) == idaapi.ASCSTR_UTF32
def _toseq(self, as_unicode):
if self.is_2_bytes_encoding():
conv = idaapi.ACFOPT_UTF16
pyenc = "utf-16"
elif self.is_4_bytes_encoding():
conv = idaapi.ACFOPT_UTF8
pyenc = "utf-8"
else:
conv = idaapi.ACFOPT_ASCII
pyenc = 'ascii'
strbytes = idaapi.get_ascii_contents2(self.ea, self.length, self.type, conv)
return unicode(strbytes, pyenc, 'replace') if as_unicode else strbytes
def __str__(self):
return self._toseq(False)
def __unicode__(self):
return self._toseq(True)
STR_C = 0x0001
"""C-style ASCII string"""
STR_PASCAL = 0x0002
"""Pascal-style ASCII string (length byte)"""
STR_LEN2 = 0x0004
"""Pascal-style, length is 2 bytes"""
STR_UNICODE = 0x0008
"""Unicode string"""
STR_LEN4 = 0x0010
"""Pascal-style, length is 4 bytes"""
STR_ULEN2 = 0x0020
"""Pascal-style Unicode, length is 2 bytes"""
STR_ULEN4 = 0x0040
"""Pascal-style Unicode, length is 4 bytes"""
def clear_cache(self):
"""Clears the strings list cache"""
self.refresh(0, 0) # when ea1=ea2 the kernel will clear the cache
def __init__(self, default_setup = False):
"""
Initializes the Strings enumeration helper class
@param default_setup: Set to True to use default setup (C strings, min len 5, ...)
"""
self.size = 0
if default_setup:
self.setup()
else:
self.refresh()
self._si = idaapi.string_info_t()
def refresh(self, ea1=None, ea2=None):
"""Refreshes the strings list"""
if ea1 is None:
ea1 = idaapi.cvar.inf.minEA
if ea2 is None:
ea2 = idaapi.cvar.inf.maxEA
idaapi.refresh_strlist(ea1, ea2)
self.size = idaapi.get_strlist_qty()
def setup(self,
strtypes = STR_C,
minlen = 5,
only_7bit = True,
ignore_instructions = False,
ea1 = None,
ea2 = None,
display_only_existing_strings = False):
if ea1 is None:
ea1 = idaapi.cvar.inf.minEA
if ea2 is None:
ea2 = idaapi.cvar.inf.maxEA
t = idaapi.strwinsetup_t()
t.strtypes = strtypes
t.minlen = minlen
t.only_7bit = only_7bit
t.ea1 = ea1
t.ea2 = ea2
t.display_only_existing_strings = display_only_existing_strings
idaapi.set_strlist_options(t)
# Automatically refreshes
self.refresh()
def _get_item(self, index):
if not idaapi.get_strlist_item(index, self._si):
return None
else:
return Strings.StringItem(self._si)
def __iter__(self):
return (self._get_item(index) for index in xrange(0, self.size))
def __getitem__(self, index):
"""Returns a string item or None"""
if index >= self.size:
raise KeyError
else:
return self._get_item(index)
# -----------------------------------------------------------------------
def GetIdbDir():
"""
Get IDB directory
This function returns directory path of the current IDB database
"""
return os.path.dirname(idaapi.cvar.database_idb) + os.sep
# -----------------------------------------------------------------------
def GetRegisterList():
"""Returns the register list"""
return idaapi.ph_get_regnames()
# -----------------------------------------------------------------------
def GetInstructionList():
"""Returns the instruction list of the current processor module"""
return [i[0] for i in idaapi.ph_get_instruc() if i[0]]
# -----------------------------------------------------------------------
def _Assemble(ea, line):
"""
Please refer to Assemble() - INTERNAL USE ONLY
"""
if type(line) == types.StringType:
lines = [line]
else:
lines = line
ret = []
for line in lines:
seg = idaapi.getseg(ea)
if not seg:
return (False, "No segment at ea")
ip = ea - (idaapi.ask_selector(seg.sel) << 4)
buf = idaapi.AssembleLine(ea, seg.sel, ip, seg.bitness, line)
if not buf:
return (False, "Assembler failed: " + line)
ea += len(buf)
ret.append(buf)
if len(ret) == 1:
ret = ret[0]
return (True, ret)
def Assemble(ea, line):
"""
Assembles one or more lines (does not display an message dialogs)
If line is a list then this function will attempt to assemble all the lines
This function will turn on batch mode temporarily so that no messages are displayed on the screen
@param ea: start address
@return: (False, "Error message") or (True, asm_buf) or (True, [asm_buf1, asm_buf2, asm_buf3])
"""
old_batch = idc.Batch(1)
ret = _Assemble(ea, line)
idc.Batch(old_batch)
return ret
def _copy_obj(src, dest, skip_list = None):
"""
Copy non private/non callable attributes from a class instance to another
@param src: Source class to copy from
@param dest: If it is a string then it designates the new class type that will be created and copied to.
Otherwise dest should be an instance of another class
@return: A new instance or "dest"
"""
if type(dest) == types.StringType:
# instantiate a new destination class of the specified type name?
dest = new.classobj(dest, (), {})
for x in dir(src):
# skip special and private fields
if x.startswith("__") and x.endswith("__"):
continue
# skip items in the skip list
if skip_list and x in skip_list:
continue
t = getattr(src, x)
# skip callable
if callable(t):
continue
setattr(dest, x, t)
return dest
# -----------------------------------------------------------------------
class _reg_dtyp_t(object):
"""
INTERNAL
This class describes a register's number and dtyp.
The equal operator is overloaded so that two instances can be tested for equality
"""
def __init__(self, reg, dtyp):
self.reg = reg
self.dtyp = dtyp
def __eq__(self, other):
return (self.reg == other.reg) and (self.dtyp == other.dtyp)
# -----------------------------------------------------------------------
class _procregs(object):
"""Utility class allowing the users to identify registers in a decoded instruction"""
def __getattr__(self, attr):
ri = idaapi.reg_info_t()
if not idaapi.parse_reg_name(attr, ri):
raise AttributeError()
r = _reg_dtyp_t(ri.reg, ord(idaapi.get_dtyp_by_size(ri.size)))
self.__dict__[attr] = r
return r
def __setattr__(self, attr, value):
raise AttributeError(attr)
# -----------------------------------------------------------------------
class _cpu(object):
"Simple wrapper around GetRegValue/SetRegValue"
def __getattr__(self, name):
#print "cpu.get(%s)" % name
return idc.GetRegValue(name)
def __setattr__(self, name, value):
#print "cpu.set(%s)" % name
return idc.SetRegValue(value, name)
# --------------------------------------------------------------------------
class __process_ui_actions_helper(object):
def __init__(self, actions, flags = 0):
"""Expect a list or a string with a list of actions"""
if isinstance(actions, str):
lst = actions.split(";")
elif isinstance(actions, (list, tuple)):
lst = actions
else:
raise ValueError, "Must pass a string, list or a tuple"
# Remember the action list and the flags
self.__action_list = lst
self.__flags = flags
# Reset action index
self.__idx = 0
def __len__(self):
return len(self.__action_list)
def __call__(self):
if self.__idx >= len(self.__action_list):
return False
# Execute one action
idaapi.process_ui_action(
self.__action_list[self.__idx],
self.__flags)
# Move to next action
self.__idx += 1
# Reschedule
return True
# --------------------------------------------------------------------------
def ProcessUiActions(actions, flags=0):
"""
@param actions: A string containing a list of actions separated by semicolon, a list or a tuple
@param flags: flags to be passed to process_ui_action()
@return: Boolean. Returns False if the action list was empty or execute_ui_requests() failed.
"""
# Instantiate a helper
helper = __process_ui_actions_helper(actions, flags)
return False if len(helper) < 1 else idaapi.execute_ui_requests((helper,))
# -----------------------------------------------------------------------
class peutils_t(object):
"""
PE utility class. Retrieves PE information from the database.
Constants from pe.h
"""
PE_NODE = "$ PE header" # netnode name for PE header
PE_ALT_DBG_FPOS = idaapi.BADADDR & -1 # altval() -> translated fpos of debuginfo
PE_ALT_IMAGEBASE = idaapi.BADADDR & -2 # altval() -> loading address (usually pe.imagebase)
PE_ALT_PEHDR_OFF = idaapi.BADADDR & -3 # altval() -> offset of PE header
PE_ALT_NEFLAGS = idaapi.BADADDR & -4 # altval() -> neflags
PE_ALT_TDS_LOADED = idaapi.BADADDR & -5 # altval() -> tds already loaded(1) or invalid(-1)
PE_ALT_PSXDLL = idaapi.BADADDR & -6 # altval() -> if POSIX(x86) imports from PSXDLL netnode
def __init__(self):
self.__penode = idaapi.netnode()
self.__penode.create(peutils_t.PE_NODE)
imagebase = property(
lambda self: self.__penode.altval(peutils_t.PE_ALT_IMAGEBASE)
)
header = property(
lambda self: self.__penode.altval(peutils_t.PE_ALT_PEHDR_OFF)
)
def __str__(self):
return "peutils_t(imagebase=%s, header=%s)" % (hex(self.imagebase), hex(self.header))
def header(self):
"""
Returns the complete PE header as an instance of peheader_t (defined in the SDK).
"""
return self.__penode.valobj()
# -----------------------------------------------------------------------
cpu = _cpu()
"""This is a special class instance used to access the registers as if they were attributes of this object.
For example to access the EAX register:
print "%x" % cpu.Eax
"""
procregs = _procregs()
"""This object is used to access the processor registers. It is useful when decoding instructions and you want to see which instruction is which.
For example:
x = idautils.DecodeInstruction(here())
if x[0] == procregs.Esp:
print "This operand is the register ESP
"""

8590
python_lib/idc.py Normal file

File diff suppressed because it is too large Load Diff

111
python_lib/init.py Normal file
View File

@ -0,0 +1,111 @@
#!/usr/bin/env python
# -----------------------------------------------------------------------
# IDAPython - Python plugin for Interactive Disassembler
#
# Copyright (c) The IDAPython Team <idapython@googlegroups.com>
#
# All rights reserved.
#
# For detailed copyright information see the file COPYING in
# the root of the distribution archive.
# -----------------------------------------------------------------------
# init.py - Essential init routines
# -----------------------------------------------------------------------
import os
import sys
import time
import warnings
import _idaapi
# __EA64__ is set if IDA is running in 64-bit mode
__EA64__ = _idaapi.BADADDR == 0xFFFFFFFFFFFFFFFFL
# -----------------------------------------------------------------------
# Take over the standard text outputs
# -----------------------------------------------------------------------
class IDAPythonStdOut:
"""
Dummy file-like class that receives stout and stderr
"""
def write(self, text):
# NB: in case 'text' is Unicode, msg() will decode it
# and call umsg() to print it
_idaapi.msg(text)
def flush(self):
pass
def isatty(self):
return False
# -----------------------------------------------------------------------
def runscript(script):
"""
Executes a script.
This function is present for backward compatiblity. Please use idaapi.IDAPython_ExecScript() instead
@param script: script path
@return: Error string or None on success
"""
import idaapi
return idaapi.IDAPython_ExecScript(script, globals())
# -----------------------------------------------------------------------
def print_banner():
banner = [
"Python %s " % sys.version,
"IDAPython" + (" 64-bit" if __EA64__ else "") + " v%d.%d.%d %s (serial %d) (c) The IDAPython Team <idapython@googlegroups.com>" % IDAPYTHON_VERSION
]
sepline = '-' * (max([len(s) for s in banner])+1)
print(sepline)
print("\n".join(banner))
print(sepline)
# -----------------------------------------------------------------------
# Redirect stderr and stdout to the IDA message window
_orig_stdout = sys.stdout;
_orig_stderr = sys.stderr;
sys.stdout = sys.stderr = IDAPythonStdOut()
# -----------------------------------------------------------------------
# Initialize the help, with our own stdin wrapper, that'll query the user
# -----------------------------------------------------------------------
import pydoc
class IDAPythonHelpPrompter:
def readline(self):
return idaapi.askstr(0, '', 'Help topic?')
help = pydoc.Helper(input = IDAPythonHelpPrompter(), output = sys.stdout)
# Assign a default sys.argv
sys.argv = [""]
# Have to make sure Python finds our modules
sys.path.append(_idaapi.idadir("python"))
# Remove current directory from the top of the patch search
if '' in sys.path: # On non Windows, the empty path is added
sys.path.remove('')
if os.getcwd() in sys.path:
sys.path.remove(os.getcwd())
# ...and add it to the end if needed
if not IDAPYTHON_REMOVE_CWD_SYS_PATH:
sys.path.append(os.getcwd())
# Import all the required modules
from idaapi import Choose, get_user_idadir, cvar, Choose2, Appcall, Form
from idc import *
from idautils import *
import idaapi
# Load the users personal init file
userrc = os.path.join(get_user_idadir(), "idapythonrc.py")
if os.path.exists(userrc):
idaapi.IDAPython_ExecScript(userrc, globals())
# All done, ready to rock.

View File

@ -2,8 +2,9 @@ import copy
import networkx as nx import networkx as nx
from idautils import * from idautils import *
from idaapi import * from idaapi import *
from idaapi import *
from idc import * from idc import *
#from idc_bc695 import *
import copy import copy
import networkx as nx import networkx as nx
from idautils import * from idautils import *
@ -12,6 +13,8 @@ from idc import *
from graph_analysis_ida import * from graph_analysis_ida import *
def getCfg(func, externs_eas, ea_externs): def getCfg(func, externs_eas, ea_externs):
func_start = func.startEA func_start = func.startEA
func_end = func.endEA func_end = func.endEA
@ -155,6 +158,15 @@ def attributingRe(cfg, externs_eas, ea_externs):
cfg.node[node_id]['externs'] = externs cfg.node[node_id]['externs'] = externs
numTIs = calTransferIns(bl) numTIs = calTransferIns(bl)
cfg.node[node_id]['numTIs'] = numTIs cfg.node[node_id]['numTIs'] = numTIs
numCompare = calCompareIns(bl)
cfg.node[node_id]['numCom'] = numCompare
numMove = calMoveIns(bl)
cfg.node[node_id]['numMov'] = numMove
numTerm = calTerminationIns(bl)
cfg.node[node_id]['numTerm'] = numTerm
numDD = calDateDecIns(bl)
cfg.node[node_id]['numDD'] = numDD
def attributing(cfg): def attributing(cfg):

View File

@ -10,6 +10,8 @@ import cPickle as pickle
import pdb import pdb
from graph_analysis_ida import * from graph_analysis_ida import *
from graph_property import * from graph_property import *
from cfg_constructor import cfg_construct
#import wingdbstub #import wingdbstub
#wingdbstub.Ensure() #wingdbstub.Ensure()
@ -17,6 +19,7 @@ def get_funcs(ea):
funcs = {} funcs = {}
# Get current ea # Get current ea
# Loop from start to end in the current segment # Loop from start to end in the current segment
for funcea in Functions(SegStart(ea)): for funcea in Functions(SegStart(ea)):
funcname = GetFunctionName(funcea) funcname = GetFunctionName(funcea)
func = get_func(funcea) func = get_func(funcea)
@ -123,7 +126,7 @@ def get_func_cfgs(ea):
print i print i
i += 1 i += 1
try: try:
icfg = cfg.cfg_construct(func) icfg = cfg_construct(func)
func_cfglist[funcname] = icfg func_cfglist[funcname] = icfg
except: except:
pass pass

View File

@ -204,8 +204,8 @@ def get_stack_arg(func_addr):
def processExternalSegs(): def processExternalSegs():
funcdata = {} funcdata = {}
datafunc = {} datafunc = {}
for n in xrange(idaapi.get_segm_qty()): for n in xrange(get_segm_qty()):
seg = idaapi.getnseg(n) seg = getnseg(n)
ea = seg.startEA ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_XTRN]: if segtype in [idc.SEG_XTRN]:
@ -221,8 +221,8 @@ def processExternalSegs():
def processpltSegs(): def processpltSegs():
funcdata = {} funcdata = {}
datafunc = {} datafunc = {}
for n in xrange(idaapi.get_segm_qty()): for n in xrange(get_segm_qty()):
seg = idaapi.getnseg(n) seg = getnseg(n)
ea = seg.startEA ea = seg.startEA
segname = SegName(ea) segname = SegName(ea)
if segname in ['.plt', 'extern', '.MIPS.stubs']: if segname in ['.plt', 'extern', '.MIPS.stubs']:
@ -240,8 +240,8 @@ def processpltSegs():
def processDataSegs(): def processDataSegs():
funcdata = {} funcdata = {}
datafunc = {} datafunc = {}
for n in xrange(idaapi.get_segm_qty()): for n in xrange(get_segm_qty()):
seg = idaapi.getnseg(n) seg = getnseg(n)
ea = seg.startEA ea = seg.startEA
segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE) segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
if segtype in [idc.SEG_DATA, idc.SEG_BSS]: if segtype in [idc.SEG_DATA, idc.SEG_BSS]:

View File

@ -1,7 +1,10 @@
# coding=utf-8
from idautils import * from idautils import *
from idaapi import * from idaapi import *
from idc import * from idc import *
def getfunc_consts(func): def getfunc_consts(func):
strings = [] strings = []
consts = [] consts = []
@ -12,19 +15,20 @@ def getfunc_consts(func):
consts += conts consts += conts
return strings, consts return strings, consts
def getConst(ea, offset): def getConst(ea, offset):
strings = [] strings = []
consts = [] consts = []
optype1 = GetOpType(ea, offset) optype1 = GetOpType(ea, offset)
if optype1 == idaapi.o_imm: if optype1 == o_imm:
imm_value = GetOperandValue(ea, offset) imm_value = GetOperandValue(ea, offset)
if 0<= imm_value <= 10: if 0 <= imm_value <= 10:
consts.append(imm_value) consts.append(imm_value)
else: else:
if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value): if isLoaded(imm_value) and getseg(imm_value):
str_value = GetString(imm_value) str_value = GetString(imm_value)
if str_value is None: if str_value is None:
str_value = GetString(imm_value+0x40000) str_value = GetString(imm_value + 0x40000)
if str_value is None: if str_value is None:
consts.append(imm_value) consts.append(imm_value)
else: else:
@ -43,6 +47,7 @@ def getConst(ea, offset):
consts.append(imm_value) consts.append(imm_value)
return strings, consts return strings, consts
def getBBconsts(bl): def getBBconsts(bl):
strings = [] strings = []
consts = [] consts = []
@ -52,7 +57,7 @@ def getBBconsts(bl):
inst_addr = start inst_addr = start
while inst_addr < end: while inst_addr < end:
opcode = GetMnem(inst_addr) opcode = GetMnem(inst_addr)
if opcode in ['la','jalr','call', 'jal']: if opcode in ['la', 'jalr', 'call', 'jal']:
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
continue continue
strings_src, consts_src = getConst(inst_addr, 0) strings_src, consts_src = getConst(inst_addr, 0)
@ -71,6 +76,7 @@ def getBBconsts(bl):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return strings, consts return strings, consts
def getFuncCalls(func): def getFuncCalls(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)] blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0 sumcalls = 0
@ -79,6 +85,7 @@ def getFuncCalls(func):
sumcalls += callnum sumcalls += callnum
return sumcalls return sumcalls
def getLogicInsts(func): def getLogicInsts(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)] blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0 sumcalls = 0
@ -87,6 +94,7 @@ def getLogicInsts(func):
sumcalls += callnum sumcalls += callnum
return sumcalls return sumcalls
def getTransferInsts(func): def getTransferInsts(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)] blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0 sumcalls = 0
@ -95,6 +103,7 @@ def getTransferInsts(func):
sumcalls += callnum sumcalls += callnum
return sumcalls return sumcalls
def getIntrs(func): def getIntrs(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)] blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
sumcalls = 0 sumcalls = 0
@ -103,14 +112,17 @@ def getIntrs(func):
sumcalls += callnum sumcalls += callnum
return sumcalls return sumcalls
def getLocalVariables(func): def getLocalVariables(func):
args_num = get_stackVariables(func.startEA) args_num = get_stackVariables(func.startEA)
return args_num return args_num
def getBasicBlocks(func): def getBasicBlocks(func):
blocks = [(v.startEA, v.endEA) for v in FlowChart(func)] blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
return len(blocks) return len(blocks)
def getIncommingCalls(func): def getIncommingCalls(func):
refs = CodeRefsTo(func.startEA, 0) refs = CodeRefsTo(func.startEA, 0)
re = len([v for v in refs]) re = len([v for v in refs])
@ -118,7 +130,7 @@ def getIncommingCalls(func):
def get_stackVariables(func_addr): def get_stackVariables(func_addr):
#print func_addr # print func_addr
args = [] args = []
stack = GetFrame(func_addr) stack = GetFrame(func_addr)
if not stack: if not stack:
@ -126,22 +138,24 @@ def get_stackVariables(func_addr):
firstM = GetFirstMember(stack) firstM = GetFirstMember(stack)
lastM = GetLastMember(stack) lastM = GetLastMember(stack)
i = firstM i = firstM
while i <=lastM: while i <= lastM:
mName = GetMemberName(stack,i) mName = GetMemberName(stack, i)
mSize = GetMemberSize(stack,i) mSize = GetMemberSize(stack, i)
if mSize: if mSize:
i = i + mSize i = i + mSize
else: else:
i = i+4 i = i + 4
if mName not in args and mName and 'var_' in mName: if mName not in args and mName and 'var_' in mName:
args.append(mName) args.append(mName)
return len(args) return len(args)
def calArithmeticIns(bl): def calArithmeticIns(bl):
x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1} """
mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1} 基本块算术指令统计
"""
x86_AI = {'add': 1, 'sub': 1, 'div': 1, 'imul': 1, 'idiv': 1, 'mul': 1, 'shl': 1, 'dec': 1, 'inc': 1}
mips_AI = {'add': 1, 'addu': 1, 'addi': 1, 'addiu': 1, 'mult': 1, 'multu': 1, 'div': 1, 'divu': 1}
calls = {} calls = {}
calls.update(x86_AI) calls.update(x86_AI)
calls.update(mips_AI) calls.update(mips_AI)
@ -156,8 +170,14 @@ def calArithmeticIns(bl):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return invoke_num return invoke_num
def calCalls(bl): def calCalls(bl):
calls = {'call':1, 'jal':1, 'jalr':1} """
基本快转移指令
:param bl:
:return:
"""
calls = {'call': 1, 'jal': 1, 'jalr': 1}
start = bl[0] start = bl[0]
end = bl[1] end = bl[1]
invoke_num = 0 invoke_num = 0
@ -169,7 +189,13 @@ def calCalls(bl):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return invoke_num return invoke_num
def calInsts(bl): def calInsts(bl):
"""
基本快指令数
:param bl:
:return:
"""
start = bl[0] start = bl[0]
end = bl[1] end = bl[1]
ea = start ea = start
@ -179,9 +205,16 @@ def calInsts(bl):
ea = NextHead(ea) ea = NextHead(ea)
return num return num
def calLogicInstructions(bl): def calLogicInstructions(bl):
x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1} """
mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1} 基本快逻辑运算
:param bl:
:return:
"""
x86_LI = {'and': 1, 'andn': 1, 'andnpd': 1, 'andpd': 1, 'andps': 1, 'andnps': 1, 'test': 1, 'xor': 1, 'xorpd': 1,
'pslld': 1}
mips_LI = {'and': 1, 'andi': 1, 'or': 1, 'ori': 1, 'xor': 1, 'nor': 1, 'slt': 1, 'slti': 1, 'sltu': 1}
calls = {} calls = {}
calls.update(x86_LI) calls.update(x86_LI)
calls.update(mips_LI) calls.update(mips_LI)
@ -196,7 +229,14 @@ def calLogicInstructions(bl):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return invoke_num return invoke_num
def calSconstants(bl): def calSconstants(bl):
"""
基本快字符串常量
:param bl:
:return:
"""
calls = {}
start = bl[0] start = bl[0]
end = bl[1] end = bl[1]
invoke_num = 0 invoke_num = 0
@ -210,6 +250,11 @@ def calSconstants(bl):
def calNconstants(bl): def calNconstants(bl):
"""
基本快整数常量
:param bl:
:return:
"""
start = bl[0] start = bl[0]
end = bl[1] end = bl[1]
invoke_num = 0 invoke_num = 0
@ -222,6 +267,7 @@ def calNconstants(bl):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return invoke_num return invoke_num
def retrieveExterns(bl, ea_externs): def retrieveExterns(bl, ea_externs):
externs = [] externs = []
start = bl[0] start = bl[0]
@ -237,10 +283,17 @@ def retrieveExterns(bl, ea_externs):
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return externs return externs
def calTransferIns(bl): def calTransferIns(bl):
x86_TI = {'jmp':1, 'jz':1, 'jnz':1, 'js':1, 'je':1, 'jne':1, 'jg':1, 'jle':1, 'jge':1, 'ja':1, 'jnc':1, 'call':1} """
mips_TI = {'beq':1, 'bne':1, 'bgtz':1, "bltz":1, "bgez":1, "blez":1, 'j':1, 'jal':1, 'jr':1, 'jalr':1} 基本快转移指令
arm_TI = {'MVN':1, "MOV":1} :param bl:
:return:
"""
x86_TI = {'jmp': 1, 'jz': 1, 'jnz': 1, 'js': 1, 'je': 1, 'jne': 1, 'jg': 1, 'jle': 1, 'jge': 1, 'ja': 1, 'jnc': 1,
'call': 1}
mips_TI = {'beq': 1, 'bne': 1, 'bgtz': 1, "bltz": 1, "bgez": 1, "blez": 1, 'j': 1, 'jal': 1, 'jr': 1, 'jalr': 1}
arm_TI = {'MVN': 1, "MOV": 1}
calls = {} calls = {}
calls.update(x86_TI) calls.update(x86_TI)
calls.update(mips_TI) calls.update(mips_TI)
@ -255,3 +308,79 @@ def calTransferIns(bl):
invoke_num += 1 invoke_num += 1
inst_addr = NextHead(inst_addr) inst_addr = NextHead(inst_addr)
return invoke_num return invoke_num
def calCompareIns(bl):
"""
基本快比较指令
:param bl:
:return:
"""
calls = {'cmp': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calMoveIns(bl):
"""
基本快移动指令
:param bl:
:return:
"""
calls = {'mov': 1, 'lea': 1, 'xchg': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calTerminationIns(bl):
"""
基本块终止指令
:param bl:
:return:
"""
calls = {'ret': 1, 'retn': 1, 'hlt': 1, 'sys_exit': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num
def calDateDecIns(bl):
"""
基本快数据声明
:param bl:
:return:
"""
calls = {'db': 1, 'dw': 1, 'dd': 1, 'dq': 1}
start = bl[0]
end = bl[1]
invoke_num = 0
inst_addr = start
while inst_addr < end:
opcode = GetMnem(inst_addr)
if opcode in calls:
invoke_num += 1
inst_addr = NextHead(inst_addr)
return invoke_num

View File

@ -4,16 +4,17 @@ from idc import *
import os import os
import argparse import argparse
def parse_command(): def parse_command():
parser = argparse.ArgumentParser(description='Process some integers.') parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument("--path", type=str, help="The directory where to store the generated .ida file") parser.add_argument("--path", type=str, help="The directory where to store the generated .ida file")
args = parser.parse_args() args = parser.parse_args()
return args return args
if __name__ == '__main__':
if __name__ == '__main__':
args = parse_command() args = parse_command()
path = args.path path = idc.ARGV[2]
analysis_flags = idc.GetShortPrm(idc.INF_START_AF) analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
analysis_flags &= ~idc.AF_IMMOFF analysis_flags &= ~idc.AF_IMMOFF
# turn off "automatically make offset" heuristic # turn off "automatically make offset" heuristic
@ -22,6 +23,5 @@ if __name__ == '__main__':
cfgs = get_func_cfgs_c(FirstSeg()) cfgs = get_func_cfgs_c(FirstSeg())
binary_name = idc.GetInputFile() + '.ida' binary_name = idc.GetInputFile() + '.ida'
fullpath = os.path.join(path, binary_name) fullpath = os.path.join(path, binary_name)
pickle.dump(cfgs, open(fullpath,'w')) pickle.dump(cfgs, open(fullpath, 'w'))
print binary_name
idc.Exit(0) idc.Exit(0)

View File

@ -0,0 +1,97 @@
# -*- coding: UTF-8 -*-
import sys
from matplotlib import pyplot as plt
import networkx as nx
import hashlib
import json
def print_obj(obj):
"打印对象的所有属性"
print(obj.__dict__)
def calc_sha256(file_path):
with open(file_path, 'rb') as f:
bytes = f.read()
sha256obj = hashlib.sha256(bytes)
sha256 = sha256obj.hexdigest()
return sha256
import pickle
# sub_10F20 308 反编译代码有字符串,但是这个特征提取里没有字符串 constant可能是间接引用的不识别。看了下所有函数的特征几乎都没有字符串常量可能都是写在别的地方然后引用的。
# sub_166C4 393
if __name__ == '__main__':
file_path = '../3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe'
testpath = '../store/3c580f5beca53b6599e5f04d3aa68a34bd50521d7ec5d7163849eb69f53a4150.exe.ida'
fr = open(testpath, 'r')
data1 = pickle.load(fr) # 一个二进制文件的acfgs
# function_edges
function_edge_start = []
function_edge_end = []
for item in data1.raw_graph_list[0].old_g.edges:
function_edge_start.append(item[0])
function_edge_end.append(item[1])
function_edges = [function_edge_start, function_edge_end]
fun_name_temp = []
# function hsah
file_hash = calc_sha256(file_path)
# funtion num
function_number = len(data1.raw_graph_list)
acfg_list = []
# 函数级特征
for i in range(len(data1.raw_graph_list)):
# function name
fun_name_temp.append(data1.raw_graph_list[i].funcname)
# block features
temp_G = data1.raw_graph_list[i].old_g
# block_number
block_number = len(temp_G.node)
# block_features
acfg_list_item_feature = []
for temp in range(len(temp_G.node)):
block_features = []
# call
block_features.append(temp_G.node[temp]['numCalls'])
# transfer
block_features.append(temp_G.node[temp]['numTIs'])
# arithmetic
block_features.append(temp_G.node[temp]['numAs'])
# logic
block_features.append(temp_G.node[temp]['numLIs'])
# compare
block_features.append(temp_G.node[temp]['numCom'])
# move
block_features.append(temp_G.node[temp]['numMov'])
# termination
block_features.append(temp_G.node[temp]['numTerm'])
# date declaration
block_features.append(temp_G.node[temp]['numDD'])
# total instructions
block_features.append(temp_G.node[temp]['numIns'])
# string or integer constants
block_features.append(len(temp_G.node[temp]['strings']) if len(temp_G.node[temp]['strings']) != 0 else len(
temp_G.node[temp]['consts']))
# offspring
block_features.append(temp_G.node[temp]['offs'])
acfg_list_item_feature.append(block_features)
edge_list_start = []
edge_list_end = []
for item in temp_G.edges:
edge_list_start.append(item[0])
edge_list_end.append(item[1])
block_edges = [edge_list_start, edge_list_end]
acfg_list_item = {"block_number": block_number, "block_edges": block_edges, "block_features": acfg_list_item_feature}
acfg_list.append(acfg_list_item)
json_temp = {"function_edges": function_edges, "acfg_list": acfg_list, "function_names": fun_name_temp, "hash": file_hash, "function_number": function_number}
json_str = json.dumps(json_temp)
print json_str

View File

@ -1,13 +1,16 @@
import itertools import itertools
import sys import sys
sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
sys.path.insert(1, 'C:/Python27/Lib/site-packages')
import networkx as nx import networkx as nx
#import numpy as np # import numpy as np
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
import pdb import pdb
import os import os
import re,mmap import re, mmap
#from graph_edit_new import *
# from graph_edit_new import *
class raw_graph: class raw_graph:
def __init__(self, funcname, g, func_f): def __init__(self, funcname, g, func_f):
@ -33,7 +36,7 @@ class raw_graph:
node2 = edge[1] node2 = edge[1]
self.g.add_edge(node1, node2) self.g.add_edge(node1, node2)
def obtainOffsprings(self,g): def obtainOffsprings(self, g):
nodes = g.nodes() nodes = g.nodes()
for node in nodes: for node in nodes:
offsprings = {} offsprings = {}
@ -51,16 +54,16 @@ class raw_graph:
def retrieveVec(self, id_, g): def retrieveVec(self, id_, g):
feature_vec = [] feature_vec = []
#numC0 # numC0
numc = g.node[id_]['consts'] numc = g.node[id_]['consts']
feature_vec.append(numc) feature_vec.append(numc)
#nums1 # nums1
nums = g.node[id_]['strings'] nums = g.node[id_]['strings']
feature_vec.append(nums) feature_vec.append(nums)
#offsprings2 # offsprings2
offs = g.node[id_]['offs'] offs = g.node[id_]['offs']
feature_vec.append(offs) feature_vec.append(offs)
#numAs3 # numAs3
numAs = g.node[id_]['numAs'] numAs = g.node[id_]['numAs']
feature_vec.append(numAs) feature_vec.append(numAs)
# of calls4 # of calls4
@ -75,12 +78,16 @@ class raw_graph:
# of TIs7 # of TIs7
insts = g.node[id_]['numTIs'] insts = g.node[id_]['numTIs']
feature_vec.append(insts) feature_vec.append(insts)
return feature_vec
feature_vec.append(g.node[id_]['numCom'])
feature_vec.append(g.node[id_]['numMov'])
feature_vec.append(g.node[id_]['numTerm'])
feature_vec.append(g.node[id_]['numDD'])
return feature_vec
def enumerating(self, n): def enumerating(self, n):
subgs = [] subgs = []
#pdb.set_trace() # pdb.set_trace()
for sub_nodes in itertools.combinations(self.g.nodes(), n): for sub_nodes in itertools.combinations(self.g.nodes(), n):
subg = self.g.subgraph(sub_nodes) subg = self.g.subgraph(sub_nodes)
u_subg = subg.to_undirected() u_subg = subg.to_undirected()
@ -88,10 +95,9 @@ class raw_graph:
subgs.append(subg) subgs.append(subg)
return subgs return subgs
def genMotifs(self, n): def genMotifs(self, n):
motifs = {} motifs = {}
subgs = enumerating(n) subgs = self.enumerating(n)
for subg in subgs: for subg in subgs:
if len(motifs) == 0: if len(motifs) == 0:
motifs[subg] = [subg] motifs[subg] = [subg]
@ -106,20 +112,26 @@ class raw_graph:
return motifs return motifs
def enumerating_efficient(self, n): def enumerating_efficient(self, n):
#pdb.set_trace() # pdb.set_trace()
if len(self.g) >= 200: if len(self.g) >= 200:
return [] return []
with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt','wb') as f: with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt', 'wb') as f:
nx.write_edgelist(self.g,f,data=False) nx.write_edgelist(self.g, f, data=False)
#pdb.set_trace() # pdb.set_trace()
process = Popen(["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n), "100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt", "1", "0", "0", "2", "0", "0", "0", "1000", "3", "3", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"], stdout=PIPE, stderr=PIPE) process = Popen(
["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n),
"100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt",
"1", "0", "0", "2", "0", "0", "0", "1000", "3", "3",
"/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"],
stdout=PIPE, stderr=PIPE)
stdout, stderr = process.communicate() stdout, stderr = process.communicate()
if process.returncode >= 0: if process.returncode >= 0:
#os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1") # os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1")
#pdb.set_trace() # pdb.set_trace()
#pdb.set_trace() # pdb.set_trace()
subgs = self.parseOutput("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n) subgs = self.parseOutput(
#pdb.set_trace() "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n)
# pdb.set_trace()
os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump") os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump")
return subgs return subgs
return [] return []
@ -127,7 +139,7 @@ class raw_graph:
def parseOutput(self, path, n): def parseOutput(self, path, n):
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+') pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
subgraphs = [] subgraphs = []
with open(path,'r') as f: with open(path, 'r') as f:
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
mo = re.findall(pattern, data) mo = re.findall(pattern, data)
if mo: if mo:
@ -138,7 +150,7 @@ class raw_graph:
def parseOutputByconditions(self, path, n): def parseOutputByconditions(self, path, n):
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+') pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
subgraphs = [] subgraphs = []
with open(path,'r') as f: with open(path, 'r') as f:
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
mo = re.findall(pattern, data) mo = re.findall(pattern, data)
if mo: if mo:
@ -157,8 +169,8 @@ class raw_graph:
return subgs return subgs
def createGraphDirectly(self, results): def createGraphDirectly(self, results):
#pdb.set_trace() # pdb.set_trace()
#subgs = [self.g.subgraph(indexes) for indexes in results] # subgs = [self.g.subgraph(indexes) for indexes in results]
subgs = [] subgs = []
for indexes in results: for indexes in results:
tg = template_graph() tg = template_graph()
@ -169,7 +181,7 @@ class raw_graph:
return subgs return subgs
def createGraph(self, results, n): def createGraph(self, results, n):
binary_value = int(results[0],2) binary_value = int(results[0], 2)
indexes = [int(v) for v in results[1:]] indexes = [int(v) for v in results[1:]]
fang = self.createG(results[0], n) fang = self.createG(results[0], n)
if fang: if fang:
@ -182,7 +194,7 @@ class raw_graph:
def createG(self, binary_str, n): def createG(self, binary_str, n):
g = nx.DiGraph() g = nx.DiGraph()
l = [int(v) for v in binary_str] l = [int(v) for v in binary_str]
#pdb.set_trace() # pdb.set_trace()
shape = (n, n) shape = (n, n)
data = np.array(l) data = np.array(l)
ad_matrix = data.reshape(shape) ad_matrix = data.reshape(shape)
@ -193,7 +205,6 @@ class raw_graph:
return g return g
class raw_graphs: class raw_graphs:
def __init__(self, binary_name): def __init__(self, binary_name):
self.binary_name = binary_name self.binary_name = binary_name
@ -224,21 +235,22 @@ class graphlets:
def __len__(self): def __len__(self):
return len(self.graphlets_list) return len(self.graphlets_list)
class template_graph: class template_graph:
def __init__(self, value=None): def __init__(self, value=None):
self.value = value self.value = value
self.g = None self.g = None
def updateG(self,g): def updateG(self, g):
self.g = g self.g = g
#def updateIndexes(self, indexes): # def updateIndexes(self, indexes):
# self.indexes = indexes # self.indexes = indexes
#def updateAttributes(self, pg, indexes, maing): # def updateAttributes(self, pg, indexes, maing):
# for id_ in xrange(len(indexes)): # for id_ in xrange(len(indexes)):
# index = indexes[id_] # index = indexes[id_]
# gnode = self.findNode(index, maing) # gnode = self.findNode(index, maing)
# self.g.node[gnode] = pg.node[index] # self.g.node[gnode] = pg.node[index]
class template_graphs: class template_graphs:
@ -251,7 +263,7 @@ class template_graphs:
subgs = [] subgs = []
binary_value = self.genBinValue() binary_value = self.genBinValue()
for i in xrange(binary_value): for i in xrange(binary_value):
if i == 0 : if i == 0:
continue continue
g = self.createG(i) g = self.createG(i)
if g: if g:
@ -261,13 +273,13 @@ class template_graphs:
def genBinValue(self): def genBinValue(self):
n = self.size n = self.size
self.bit_len = n*n self.bit_len = n * n
return 2**(self.bit_len) return 2 ** (self.bit_len)
def createG(self, i): def createG(self, i):
g = nx.DiGraph() g = nx.DiGraph()
l = self.genArray(i) l = self.genArray(i)
#pdb.set_trace() # pdb.set_trace()
shape = (self.size, self.size) shape = (self.size, self.size)
data = np.array(l) data = np.array(l)
ad_matrix = data.reshape(shape) ad_matrix = data.reshape(shape)