pycharm project:Genius

2021-11-18 17:43:34 +08:00 · 2021-11-18 17:43:34 +08:00 · e29e36aa32
commit e29e36aa32
parent 7dcb04cd57
48 changed files with 278106 additions and 0 deletions
--- a/Genius3/.idea/.gitignore
+++ b/Genius3/.idea/.gitignore
@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/Genius3/.idea/Genius3.iml
+++ b/Genius3/.idea/Genius3.iml
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/python" isTestSource="false" />
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/Genius3/.idea/inspectionProfiles/profiles_settings.xml
+++ b/Genius3/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/Genius3/.idea/misc.xml
+++ b/Genius3/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7 (Genius3)" project-jdk-type="Python SDK" />
+</project>
--- a/Genius3/.idea/modules.xml
+++ b/Genius3/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Genius3.iml" filepath="$PROJECT_DIR$/.idea/Genius3.iml" />
+    </modules>
+  </component>
+</project>
--- a/Genius3/acfgs/hpcenter.ida
+++ b/Genius3/acfgs/hpcenter.ida
--- a/Genius3/main.py
+++ b/Genius3/main.py
@ -0,0 +1,16 @@
+# -*- coding: UTF-8 -*-
+import sys
+
+from func import *
+from raw_graphs import *
+from idc import *
+import os
+import argparse
+if __name__ == '__main__':
+	print "hello"
+
+	#
+	# E:\BaiduNetdiskDownload\IDA_Pro_v6.8\IDA_Pro_v6.8\idaq.exe -c -A -S"raw-feature-extractor/preprocessing_ida.py --path C:\Program1\pycharmproject\Genius3\acfgs" hpcenter
+	# -c 删除旧数据库  -A 自动分析，不显示对话框
+	# -B 相当于 -c -A
+
--- a/Genius3/python/PySide/QtCore.pyd
+++ b/Genius3/python/PySide/QtCore.pyd
--- a/Genius3/python/PySide/QtDeclarative.pyd
+++ b/Genius3/python/PySide/QtDeclarative.pyd
--- a/Genius3/python/PySide/QtGui.pyd
+++ b/Genius3/python/PySide/QtGui.pyd
--- a/Genius3/python/PySide/QtHelp.pyd
+++ b/Genius3/python/PySide/QtHelp.pyd
--- a/Genius3/python/PySide/QtMultimedia.pyd
+++ b/Genius3/python/PySide/QtMultimedia.pyd
--- a/Genius3/python/PySide/QtNetwork.pyd
+++ b/Genius3/python/PySide/QtNetwork.pyd
--- a/Genius3/python/PySide/QtOpenGL.pyd
+++ b/Genius3/python/PySide/QtOpenGL.pyd
--- a/Genius3/python/PySide/QtScript.pyd
+++ b/Genius3/python/PySide/QtScript.pyd
--- a/Genius3/python/PySide/QtScriptTools.pyd
+++ b/Genius3/python/PySide/QtScriptTools.pyd
--- a/Genius3/python/PySide/QtSql.pyd
+++ b/Genius3/python/PySide/QtSql.pyd
--- a/Genius3/python/PySide/QtSvg.pyd
+++ b/Genius3/python/PySide/QtSvg.pyd
--- a/Genius3/python/PySide/QtTest.pyd
+++ b/Genius3/python/PySide/QtTest.pyd
--- a/Genius3/python/PySide/QtUiTools.pyd
+++ b/Genius3/python/PySide/QtUiTools.pyd
--- a/Genius3/python/PySide/QtXml.pyd
+++ b/Genius3/python/PySide/QtXml.pyd
--- a/Genius3/python/PySide/QtXmlPatterns.pyd
+++ b/Genius3/python/PySide/QtXmlPatterns.pyd
--- a/Genius3/python/PySide/init.py
+++ b/Genius3/python/PySide/init.py
@ -0,0 +1,3 @@
+__all__ = ['QtCore', 'QtGui', 'QtNetwork', 'QtOpenGL', 'QtSql', 'QtSvg', 'QtTest', 'QtWebKit', 'QtScript']
+__version__         = "1.1.2"
+__version_info__    = (1, 1, 2, "final", 1)
--- a/Genius3/python/PySide/phonon.pyd
+++ b/Genius3/python/PySide/phonon.pyd
--- a/Genius3/python/PySide/pyside-python2.7.dll
+++ b/Genius3/python/PySide/pyside-python2.7.dll
--- a/Genius3/python/PySide/shiboken-python2.7.dll
+++ b/Genius3/python/PySide/shiboken-python2.7.dll
--- a/Genius3/python/idaapi.py
+++ b/Genius3/python/idaapi.py
--- a/Genius3/python/idaapi.pyc
+++ b/Genius3/python/idaapi.pyc
--- a/Genius3/python/idautils.py
+++ b/Genius3/python/idautils.py
@ -0,0 +1,830 @@
+#---------------------------------------------------------------------
+# IDAPython - Python plugin for Interactive Disassembler
+#
+# Copyright (c) 2004-2010 Gergely Erdelyi <gergely.erdelyi@d-dome.net>
+#
+# All rights reserved.
+#
+# For detailed copyright information see the file COPYING in
+# the root of the distribution archive.
+#---------------------------------------------------------------------
+"""
+idautils.py - High level utility functions for IDA
+"""
+import idaapi
+import idc
+import types
+import os
+
+
+def refs(ea, funcfirst, funcnext):
+    """
+    Generic reference collector - INTERNAL USE ONLY.
+    """
+    ref = funcfirst(ea)
+    while ref != idaapi.BADADDR:
+        yield ref
+        ref = funcnext(ea, ref)
+
+
+def CodeRefsTo(ea, flow):
+    """
+    Get a list of code references to 'ea'
+
+    @param ea:   Target address
+    @param flow: Follow normal code flow or not
+    @type  flow: Boolean (0/1, False/True)
+
+    @return: list of references (may be empty list)
+
+    Example::
+
+        for ref in CodeRefsTo(ScreenEA(), 1):
+            print ref
+    """
+    if flow == 1:
+        return refs(ea, idaapi.get_first_cref_to, idaapi.get_next_cref_to)
+    else:
+        return refs(ea, idaapi.get_first_fcref_to, idaapi.get_next_fcref_to)
+
+
+def CodeRefsFrom(ea, flow):
+    """
+    Get a list of code references from 'ea'
+
+    @param ea:   Target address
+    @param flow: Follow normal code flow or not
+    @type  flow: Boolean (0/1, False/True)
+
+    @return: list of references (may be empty list)
+
+    Example::
+
+        for ref in CodeRefsFrom(ScreenEA(), 1):
+            print ref
+    """
+    if flow == 1:
+        return refs(ea, idaapi.get_first_cref_from, idaapi.get_next_cref_from)
+    else:
+        return refs(ea, idaapi.get_first_fcref_from, idaapi.get_next_fcref_from)
+
+
+def DataRefsTo(ea):
+    """
+    Get a list of data references to 'ea'
+
+    @param ea:   Target address
+
+    @return: list of references (may be empty list)
+
+    Example::
+
+        for ref in DataRefsTo(ScreenEA()):
+            print ref
+    """
+    return refs(ea, idaapi.get_first_dref_to, idaapi.get_next_dref_to)
+
+
+def DataRefsFrom(ea):
+    """
+    Get a list of data references from 'ea'
+
+    @param ea:   Target address
+
+    @return: list of references (may be empty list)
+
+    Example::
+
+        for ref in DataRefsFrom(ScreenEA()):
+            print ref
+    """
+    return refs(ea, idaapi.get_first_dref_from, idaapi.get_next_dref_from)
+
+
+def XrefTypeName(typecode):
+    """
+    Convert cross-reference type codes to readable names
+
+    @param typecode: cross-reference type code
+    """
+    ref_types = {
+        0  : 'Data_Unknown',
+        1  : 'Data_Offset',
+        2  : 'Data_Write',
+        3  : 'Data_Read',
+        4  : 'Data_Text',
+        5  : 'Data_Informational',
+        16 : 'Code_Far_Call',
+        17 : 'Code_Near_Call',
+        18 : 'Code_Far_Jump',
+        19 : 'Code_Near_Jump',
+        20 : 'Code_User',
+        21 : 'Ordinary_Flow'
+        }
+    assert typecode in ref_types, "unknown reference type %d" % typecode
+    return ref_types[typecode]
+
+
+def _copy_xref(xref):
+    """ Make a private copy of the xref class to preserve its contents """
+    class _xref(object):
+        pass
+
+    xr = _xref()
+    for attr in [ 'frm', 'to', 'iscode', 'type', 'user' ]:
+        setattr(xr, attr, getattr(xref, attr))
+    return xr
+
+
+def XrefsFrom(ea, flags=0):
+    """
+    Return all references from address 'ea'
+
+    @param ea: Reference address
+    @param flags: any of idaapi.XREF_* flags
+
+    Example::
+           for xref in XrefsFrom(here(), 0):
+               print xref.type, XrefTypeName(xref.type), \
+                         'from', hex(xref.frm), 'to', hex(xref.to)
+    """
+    xref = idaapi.xrefblk_t()
+    if xref.first_from(ea, flags):
+        yield _copy_xref(xref)
+        while xref.next_from():
+            yield _copy_xref(xref)
+
+
+def XrefsTo(ea, flags=0):
+    """
+    Return all references to address 'ea'
+
+    @param ea: Reference address
+    @param flags: any of idaapi.XREF_* flags
+
+    Example::
+           for xref in XrefsTo(here(), 0):
+               print xref.type, XrefTypeName(xref.type), \
+                         'from', hex(xref.frm), 'to', hex(xref.to)
+    """
+    xref = idaapi.xrefblk_t()
+    if xref.first_to(ea, flags):
+        yield _copy_xref(xref)
+        while xref.next_to():
+            yield _copy_xref(xref)
+
+
+def Threads():
+    """Returns all thread IDs"""
+    for i in xrange(0, idc.GetThreadQty()):
+        yield idc.GetThreadId(i)
+
+
+def Heads(start=None, end=None):
+    """
+    Get a list of heads (instructions or data)
+
+    @param start: start address (default: inf.minEA)
+    @param end:   end address (default: inf.maxEA)
+
+    @return: list of heads between start and end
+    """
+    if not start: start = idaapi.cvar.inf.minEA
+    if not end:   end = idaapi.cvar.inf.maxEA
+
+    ea = start
+    if not idc.isHead(idc.GetFlags(ea)):
+        ea = idaapi.next_head(ea, end)
+    while ea != idaapi.BADADDR:
+        yield ea
+        ea = idaapi.next_head(ea, end)
+
+
+def Functions(start=None, end=None):
+    """
+    Get a list of functions
+
+    @param start: start address (default: inf.minEA)
+    @param end:   end address (default: inf.maxEA)
+
+    @return: list of heads between start and end
+
+    @note: The last function that starts before 'end' is included even
+    if it extends beyond 'end'. Any function that has its chunks scattered
+    in multiple segments will be reported multiple times, once in each segment
+    as they are listed.
+    """
+    if not start: start = idaapi.cvar.inf.minEA
+    if not end:   end = idaapi.cvar.inf.maxEA
+
+    # find first function head chunk in the range
+    chunk = idaapi.get_fchunk(start)
+    if not chunk:
+        chunk = idaapi.get_next_fchunk(start)
+    while chunk and chunk.startEA < end and (chunk.flags & idaapi.FUNC_TAIL) != 0:
+        chunk = idaapi.get_next_fchunk(chunk.startEA)
+    func = chunk
+
+    while func and func.startEA < end:
+        startea = func.startEA
+        yield startea
+        func = idaapi.get_next_func(startea)
+
+
+def Chunks(start):
+    """
+    Get a list of function chunks
+
+    @param start: address of the function
+
+    @return: list of funcion chunks (tuples of the form (start_ea, end_ea))
+             belonging to the function
+    """
+    func_iter = idaapi.func_tail_iterator_t( idaapi.get_func( start ) )
+    status = func_iter.main()
+    while status:
+        chunk = func_iter.chunk()
+        yield (chunk.startEA, chunk.endEA)
+        status = func_iter.next()
+
+
+def Modules():
+    """
+    Returns a list of module objects with name,size,base and the rebase_to attributes
+    """
+    mod = idaapi.module_info_t()
+    result = idaapi.get_first_module(mod)
+    while result:
+        yield idaapi.object_t(name=mod.name, size=mod.size, base=mod.base, rebase_to=mod.rebase_to)
+        result = idaapi.get_next_module(mod)
+
+
+def Names():
+    """
+    Returns a list of names
+
+    @return: List of tuples (ea, name)
+    """
+    for i in xrange(idaapi.get_nlist_size()):
+        ea   = idaapi.get_nlist_ea(i)
+        name = idaapi.get_nlist_name(i)
+        yield (ea, name)
+
+
+def Segments():
+    """
+    Get list of segments (sections) in the binary image
+
+    @return: List of segment start addresses.
+    """
+    for n in xrange(idaapi.get_segm_qty()):
+        seg = idaapi.getnseg(n)
+        if seg:
+            yield seg.startEA
+
+
+def Entries():
+    """
+    Returns a list of entry points
+
+    @return: List of tuples (index, ordinal, ea, name)
+    """
+    n = idaapi.get_entry_qty()
+    for i in xrange(0, n):
+        ordinal = idaapi.get_entry_ordinal(i)
+        ea      = idaapi.get_entry(ordinal)
+        name    = idaapi.get_entry_name(ordinal)
+        yield (i, ordinal, ea, name)
+
+
+def FuncItems(start):
+    """
+    Get a list of function items
+
+    @param start: address of the function
+
+    @return: ea of each item in the function
+    """
+    func = idaapi.get_func(start)
+    if not func:
+        return
+    fii = idaapi.func_item_iterator_t()
+    ok = fii.set(func)
+    while ok:
+        yield fii.current()
+        ok = fii.next_code()
+
+
+def Structs():
+    """
+    Get a list of structures
+
+    @return: List of tuples (idx, sid, name)
+    """
+    idx  = idc.GetFirstStrucIdx()
+    while idx != idaapi.BADADDR:
+        sid = idc.GetStrucId(idx)
+        yield (idx, sid, idc.GetStrucName(sid))
+        idx = idc.GetNextStrucIdx(idx)
+
+
+def StructMembers(sid):
+    """
+    Get a list of structure members information (or stack vars if given a frame).
+
+    @param sid: ID of the structure.
+
+    @return: List of tuples (offset, name, size)
+
+    @note: If 'sid' does not refer to a valid structure,
+           an exception will be raised.
+    @note: This will not return 'holes' in structures/stack frames;
+           it only returns defined structure members.
+    """
+    m = idc.GetFirstMember(sid)
+    if m == -1:
+        raise Exception("No structure with ID: 0x%x" % sid)
+    while (m != idaapi.BADADDR):
+        name = idc.GetMemberName(sid, m)
+        if name:
+            yield (m, name, idc.GetMemberSize(sid, m))
+        m = idc.GetStrucNextOff(sid, m)
+
+
+def DecodePrecedingInstruction(ea):
+    """
+    Decode preceding instruction in the execution flow.
+
+    @param ea: address to decode
+    @return: (None or the decode instruction, farref)
+             farref will contain 'true' if followed an xref, false otherwise
+    """
+    prev_addr, farref  = idaapi.decode_preceding_insn(ea)
+    if prev_addr == idaapi.BADADDR:
+        return (None, False)
+    else:
+        return (idaapi.cmd.copy(), farref)
+
+
+
+def DecodePreviousInstruction(ea):
+    """
+    Decodes the previous instruction and returns an insn_t like class
+
+    @param ea: address to decode
+    @return: None or a new insn_t instance
+    """
+    prev_addr = idaapi.decode_prev_insn(ea)
+    if prev_addr == idaapi.BADADDR:
+        return None
+
+    return idaapi.cmd.copy()
+
+
+def DecodeInstruction(ea):
+    """
+    Decodes an instruction and returns an insn_t like class
+
+    @param ea: address to decode
+    @return: None or a new insn_t instance
+    """
+    inslen = idaapi.decode_insn(ea)
+    if inslen == 0:
+        return None
+
+    return idaapi.cmd.copy()
+
+
+def GetDataList(ea, count, itemsize=1):
+    """
+    Get data list - INTERNAL USE ONLY
+    """
+    if itemsize == 1:
+        getdata = idaapi.get_byte
+    elif itemsize == 2:
+        getdata = idaapi.get_word
+    elif itemsize == 4:
+        getdata = idaapi.get_long
+    elif itemsize == 8:
+        getdata = idaapi.get_qword
+    else:
+        raise ValueError, "Invalid data size! Must be 1, 2, 4 or 8"
+
+    endea = ea + itemsize * count
+    curea = ea
+    while curea < endea:
+        yield getdata(curea)
+        curea += itemsize
+
+
+def PutDataList(ea, datalist, itemsize=1):
+    """
+    Put data list - INTERNAL USE ONLY
+    """
+    putdata = None
+
+    if itemsize == 1:
+        putdata = idaapi.patch_byte
+    if itemsize == 2:
+        putdata = idaapi.patch_word
+    if itemsize == 4:
+        putdata = idaapi.patch_long
+
+    assert putdata, "Invalid data size! Must be 1, 2 or 4"
+
+    for val in datalist:
+        putdata(ea, val)
+        ea = ea + itemsize
+
+
+def MapDataList(ea, length, func, wordsize=1):
+    """
+    Map through a list of data words in the database
+
+    @param ea:       start address
+    @param length:   number of words to map
+    @param func:     mapping function
+    @param wordsize: size of words to map [default: 1 byte]
+
+    @return: None
+    """
+    PutDataList(ea, map(func, GetDataList(ea, length, wordsize)), wordsize)
+
+
+def GetInputFileMD5():
+    """
+    Return the MD5 hash of the input binary file
+
+    @return: MD5 string or None on error
+    """
+    return idc.GetInputMD5()
+
+
+class Strings(object):
+    """
+    Allows iterating over the string list. The set of strings will not be modified.
+    , unless asked explicitly at setup()-time..
+
+    Example:
+        s = Strings()
+
+        for i in s:
+            print "%x: len=%d type=%d -> '%s'" % (i.ea, i.length, i.type, str(i))
+
+    """
+    class StringItem(object):
+        """
+        Class representing each string item.
+        """
+        def __init__(self, si):
+            self.ea     = si.ea
+            """String ea"""
+            self.type   = si.type
+            """string type (ASCSTR_xxxxx)"""
+            self.length = si.length
+            """string length"""
+
+        def is_1_byte_encoding(self):
+            return not self.is_2_bytes_encoding() and not self.is_4_bytes_encoding()
+
+        def is_2_bytes_encoding(self):
+            return (self.type & 7) in [idaapi.ASCSTR_UTF16, idaapi.ASCSTR_ULEN2, idaapi.ASCSTR_ULEN4]
+
+        def is_4_bytes_encoding(self):
+            return (self.type & 7) == idaapi.ASCSTR_UTF32
+
+        def _toseq(self, as_unicode):
+            if self.is_2_bytes_encoding():
+                conv = idaapi.ACFOPT_UTF16
+                pyenc = "utf-16"
+            elif self.is_4_bytes_encoding():
+                conv = idaapi.ACFOPT_UTF8
+                pyenc = "utf-8"
+            else:
+                conv = idaapi.ACFOPT_ASCII
+                pyenc = 'ascii'
+            strbytes = idaapi.get_ascii_contents2(self.ea, self.length, self.type, conv)
+            return unicode(strbytes, pyenc, 'replace') if as_unicode else strbytes
+
+        def __str__(self):
+            return self._toseq(False)
+
+        def __unicode__(self):
+            return self._toseq(True)
+
+
+    STR_C       = 0x0001
+    """C-style ASCII string"""
+    STR_PASCAL  = 0x0002
+    """Pascal-style ASCII string (length byte)"""
+    STR_LEN2    = 0x0004
+    """Pascal-style, length is 2 bytes"""
+    STR_UNICODE = 0x0008
+    """Unicode string"""
+    STR_LEN4    = 0x0010
+    """Pascal-style, length is 4 bytes"""
+    STR_ULEN2   = 0x0020
+    """Pascal-style Unicode, length is 2 bytes"""
+    STR_ULEN4   = 0x0040
+    """Pascal-style Unicode, length is 4 bytes"""
+
+    def clear_cache(self):
+        """Clears the strings list cache"""
+        self.refresh(0, 0) # when ea1=ea2 the kernel will clear the cache
+
+    def __init__(self, default_setup = False):
+        """
+        Initializes the Strings enumeration helper class
+
+        @param default_setup: Set to True to use default setup (C strings, min len 5, ...)
+        """
+        self.size = 0
+        if default_setup:
+            self.setup()
+        else:
+            self.refresh()
+
+        self._si  = idaapi.string_info_t()
+
+    def refresh(self, ea1=None, ea2=None):
+        """Refreshes the strings list"""
+        if ea1 is None:
+            ea1 = idaapi.cvar.inf.minEA
+        if ea2 is None:
+            ea2 = idaapi.cvar.inf.maxEA
+
+        idaapi.refresh_strlist(ea1, ea2)
+        self.size = idaapi.get_strlist_qty()
+
+
+    def setup(self,
+              strtypes = STR_C,
+              minlen = 5,
+              only_7bit = True,
+              ignore_instructions = False,
+              ea1 = None,
+              ea2 = None,
+              display_only_existing_strings = False):
+
+        if ea1 is None:
+            ea1 = idaapi.cvar.inf.minEA
+
+        if ea2 is None:
+            ea2 = idaapi.cvar.inf.maxEA
+
+        t = idaapi.strwinsetup_t()
+        t.strtypes = strtypes
+        t.minlen = minlen
+        t.only_7bit = only_7bit
+        t.ea1 = ea1
+        t.ea2 = ea2
+        t.display_only_existing_strings = display_only_existing_strings
+        idaapi.set_strlist_options(t)
+
+        # Automatically refreshes
+        self.refresh()
+
+
+    def _get_item(self, index):
+        if not idaapi.get_strlist_item(index, self._si):
+            return None
+        else:
+            return Strings.StringItem(self._si)
+
+
+    def __iter__(self):
+        return (self._get_item(index) for index in xrange(0, self.size))
+
+
+    def __getitem__(self, index):
+        """Returns a string item or None"""
+        if index >= self.size:
+            raise KeyError
+        else:
+            return self._get_item(index)
+
+# -----------------------------------------------------------------------
+def GetIdbDir():
+    """
+    Get IDB directory
+
+    This function returns directory path of the current IDB database
+    """
+    return os.path.dirname(idaapi.cvar.database_idb) + os.sep
+
+# -----------------------------------------------------------------------
+def GetRegisterList():
+    """Returns the register list"""
+    return idaapi.ph_get_regnames()
+
+# -----------------------------------------------------------------------
+def GetInstructionList():
+    """Returns the instruction list of the current processor module"""
+    return [i[0] for i in idaapi.ph_get_instruc() if i[0]]
+
+# -----------------------------------------------------------------------
+def _Assemble(ea, line):
+    """
+    Please refer to Assemble() - INTERNAL USE ONLY
+    """
+    if type(line) == types.StringType:
+        lines = [line]
+    else:
+        lines = line
+    ret = []
+    for line in lines:
+        seg = idaapi.getseg(ea)
+        if not seg:
+            return (False, "No segment at ea")
+        ip  = ea - (idaapi.ask_selector(seg.sel) << 4)
+        buf = idaapi.AssembleLine(ea, seg.sel, ip, seg.bitness, line)
+        if not buf:
+            return (False, "Assembler failed: " + line)
+        ea += len(buf)
+        ret.append(buf)
+
+    if len(ret) == 1:
+        ret = ret[0]
+    return (True, ret)
+
+
+def Assemble(ea, line):
+    """
+    Assembles one or more lines (does not display an message dialogs)
+    If line is a list then this function will attempt to assemble all the lines
+    This function will turn on batch mode temporarily so that no messages are displayed on the screen
+
+    @param ea:       start address
+    @return: (False, "Error message") or (True, asm_buf) or (True, [asm_buf1, asm_buf2, asm_buf3])
+    """
+    old_batch = idc.Batch(1)
+    ret = _Assemble(ea, line)
+    idc.Batch(old_batch)
+    return ret
+
+def _copy_obj(src, dest, skip_list = None):
+    """
+    Copy non private/non callable attributes from a class instance to another
+    @param src: Source class to copy from
+    @param dest: If it is a string then it designates the new class type that will be created and copied to.
+                 Otherwise dest should be an instance of another class
+    @return: A new instance or "dest"
+    """
+    if type(dest) == types.StringType:
+        # instantiate a new destination class of the specified type name?
+        dest = new.classobj(dest, (), {})
+    for x in dir(src):
+        # skip special and private fields
+        if x.startswith("__") and x.endswith("__"):
+            continue
+        # skip items in the skip list
+        if skip_list and x in skip_list:
+            continue
+        t = getattr(src, x)
+        # skip callable
+        if callable(t):
+            continue
+        setattr(dest, x, t)
+    return dest
+
+# -----------------------------------------------------------------------
+class _reg_dtyp_t(object):
+    """
+    INTERNAL
+    This class describes a register's number and dtyp.
+    The equal operator is overloaded so that two instances can be tested for equality
+    """
+    def __init__(self, reg, dtyp):
+        self.reg  = reg
+        self.dtyp = dtyp
+
+    def __eq__(self, other):
+        return (self.reg == other.reg) and (self.dtyp == other.dtyp)
+
+# -----------------------------------------------------------------------
+class _procregs(object):
+    """Utility class allowing the users to identify registers in a decoded instruction"""
+    def __getattr__(self, attr):
+        ri = idaapi.reg_info_t()
+        if not idaapi.parse_reg_name(attr, ri):
+            raise AttributeError()
+        r = _reg_dtyp_t(ri.reg, ord(idaapi.get_dtyp_by_size(ri.size)))
+        self.__dict__[attr] = r
+        return r
+
+    def __setattr__(self, attr, value):
+        raise AttributeError(attr)
+
+
+# -----------------------------------------------------------------------
+class _cpu(object):
+    "Simple wrapper around GetRegValue/SetRegValue"
+    def __getattr__(self, name):
+        #print "cpu.get(%s)" % name
+        return idc.GetRegValue(name)
+
+    def __setattr__(self, name, value):
+        #print "cpu.set(%s)" % name
+        return idc.SetRegValue(value, name)
+
+
+# --------------------------------------------------------------------------
+class __process_ui_actions_helper(object):
+    def __init__(self, actions, flags = 0):
+        """Expect a list or a string with a list of actions"""
+        if isinstance(actions, str):
+            lst = actions.split(";")
+        elif isinstance(actions, (list, tuple)):
+            lst = actions
+        else:
+            raise ValueError, "Must pass a string, list or a tuple"
+
+        # Remember the action list and the flags
+        self.__action_list = lst
+        self.__flags = flags
+
+        # Reset action index
+        self.__idx = 0
+
+    def __len__(self):
+        return len(self.__action_list)
+
+    def __call__(self):
+        if self.__idx >= len(self.__action_list):
+            return False
+
+        # Execute one action
+        idaapi.process_ui_action(
+                self.__action_list[self.__idx],
+                self.__flags)
+
+        # Move to next action
+        self.__idx += 1
+
+        # Reschedule
+        return True
+
+
+# --------------------------------------------------------------------------
+def ProcessUiActions(actions, flags=0):
+    """
+    @param actions: A string containing a list of actions separated by semicolon, a list or a tuple
+    @param flags: flags to be passed to process_ui_action()
+    @return: Boolean. Returns False if the action list was empty or execute_ui_requests() failed.
+    """
+
+    # Instantiate a helper
+    helper = __process_ui_actions_helper(actions, flags)
+    return False if len(helper) < 1 else idaapi.execute_ui_requests((helper,))
+
+
+# -----------------------------------------------------------------------
+class peutils_t(object):
+    """
+    PE utility class. Retrieves PE information from the database.
+
+    Constants from pe.h
+    """
+    PE_NODE = "$ PE header" # netnode name for PE header
+    PE_ALT_DBG_FPOS   = idaapi.BADADDR & -1 #  altval() -> translated fpos of debuginfo
+    PE_ALT_IMAGEBASE  = idaapi.BADADDR & -2 #  altval() -> loading address (usually pe.imagebase)
+    PE_ALT_PEHDR_OFF  = idaapi.BADADDR & -3 #  altval() -> offset of PE header
+    PE_ALT_NEFLAGS    = idaapi.BADADDR & -4 #  altval() -> neflags
+    PE_ALT_TDS_LOADED = idaapi.BADADDR & -5 #  altval() -> tds already loaded(1) or invalid(-1)
+    PE_ALT_PSXDLL     = idaapi.BADADDR & -6 #  altval() -> if POSIX(x86) imports from PSXDLL netnode
+
+    def __init__(self):
+        self.__penode = idaapi.netnode()
+        self.__penode.create(peutils_t.PE_NODE)
+
+    imagebase = property(
+        lambda self: self.__penode.altval(peutils_t.PE_ALT_IMAGEBASE)
+      )
+
+    header = property(
+        lambda self: self.__penode.altval(peutils_t.PE_ALT_PEHDR_OFF)
+      )
+
+    def __str__(self):
+        return "peutils_t(imagebase=%s, header=%s)" % (hex(self.imagebase), hex(self.header))
+
+    def header(self):
+        """
+        Returns the complete PE header as an instance of peheader_t (defined in the SDK).
+        """
+        return self.__penode.valobj()
+
+# -----------------------------------------------------------------------
+cpu = _cpu()
+"""This is a special class instance used to access the registers as if they were attributes of this object.
+For example to access the EAX register:
+    print "%x" % cpu.Eax
+"""
+
+procregs = _procregs()
+"""This object is used to access the processor registers. It is useful when decoding instructions and you want to see which instruction is which.
+For example:
+    x = idautils.DecodeInstruction(here())
+    if x[0] == procregs.Esp:
+        print "This operand is the register ESP
+"""
--- a/Genius3/python/idautils.pyc
+++ b/Genius3/python/idautils.pyc
--- a/Genius3/python/idc.py
+++ b/Genius3/python/idc.py
--- a/Genius3/python/idc.pyc
+++ b/Genius3/python/idc.pyc
--- a/Genius3/python/init.py
+++ b/Genius3/python/init.py
@ -0,0 +1,111 @@
+#!/usr/bin/env python
+# -----------------------------------------------------------------------
+# IDAPython - Python plugin for Interactive Disassembler
+#
+# Copyright (c) The IDAPython Team <idapython@googlegroups.com>
+#
+# All rights reserved.
+#
+# For detailed copyright information see the file COPYING in
+# the root of the distribution archive.
+# -----------------------------------------------------------------------
+# init.py - Essential init routines
+# -----------------------------------------------------------------------
+import os
+import sys
+import time
+import warnings
+import _idaapi
+
+# __EA64__ is set if IDA is running in 64-bit mode
+__EA64__ = _idaapi.BADADDR == 0xFFFFFFFFFFFFFFFFL
+
+# -----------------------------------------------------------------------
+# Take over the standard text outputs
+# -----------------------------------------------------------------------
+class IDAPythonStdOut:
+    """
+    Dummy file-like class that receives stout and stderr
+    """
+    def write(self, text):
+        # NB: in case 'text' is Unicode, msg() will decode it
+        # and call umsg() to print it
+        _idaapi.msg(text)
+
+    def flush(self):
+        pass
+
+    def isatty(self):
+        return False
+
+# -----------------------------------------------------------------------
+def runscript(script):
+    """
+    Executes a script.
+    This function is present for backward compatiblity. Please use idaapi.IDAPython_ExecScript() instead
+
+    @param script: script path
+
+    @return: Error string or None on success
+    """
+
+    import idaapi
+    return idaapi.IDAPython_ExecScript(script, globals())
+
+# -----------------------------------------------------------------------
+def print_banner():
+    banner = [
+      "Python %s " % sys.version,
+      "IDAPython" + (" 64-bit" if __EA64__ else "") + " v%d.%d.%d %s (serial %d) (c) The IDAPython Team <idapython@googlegroups.com>" % IDAPYTHON_VERSION
+    ]
+    sepline = '-' * (max([len(s) for s in banner])+1)
+
+    print(sepline)
+    print("\n".join(banner))
+    print(sepline)
+
+# -----------------------------------------------------------------------
+
+# Redirect stderr and stdout to the IDA message window
+_orig_stdout = sys.stdout;
+_orig_stderr = sys.stderr;
+sys.stdout = sys.stderr = IDAPythonStdOut()
+
+# -----------------------------------------------------------------------
+# Initialize the help, with our own stdin wrapper, that'll query the user
+# -----------------------------------------------------------------------
+import pydoc
+class IDAPythonHelpPrompter:
+    def readline(self):
+        return idaapi.askstr(0, '', 'Help topic?')
+help = pydoc.Helper(input = IDAPythonHelpPrompter(), output = sys.stdout)
+
+# Assign a default sys.argv
+sys.argv = [""]
+
+# Have to make sure Python finds our modules
+sys.path.append(_idaapi.idadir("python"))
+
+# Remove current directory from the top of the patch search
+if '' in sys.path: # On non Windows, the empty path is added
+    sys.path.remove('')
+
+if os.getcwd() in sys.path:
+    sys.path.remove(os.getcwd())
+
+# ...and add it to the end if needed
+if not IDAPYTHON_REMOVE_CWD_SYS_PATH:
+    sys.path.append(os.getcwd())
+
+# Import all the required modules
+from idaapi import Choose, get_user_idadir, cvar, Choose2, Appcall, Form
+from idc      import *
+from idautils import *
+import idaapi
+
+# Load the users personal init file
+userrc = os.path.join(get_user_idadir(), "idapythonrc.py")
+if os.path.exists(userrc):
+    idaapi.IDAPython_ExecScript(userrc, globals())
+
+# All done, ready to rock.
--- a/Genius3/raw-feature-extractor/cfg_constructor.py
+++ b/Genius3/raw-feature-extractor/cfg_constructor.py
@ -0,0 +1,286 @@
+import copy
+import networkx as nx
+from idautils import *
+from idaapi import *
+from idc import *
+
+import copy
+import networkx as nx
+from idautils import *
+from idaapi import *
+from idc import *
+from graph_analysis_ida import *
+
+
+def getCfg(func, externs_eas, ea_externs):
+	func_start = func.startEA
+	func_end = func.endEA
+	cfg = nx.DiGraph()
+	control_blocks, main_blocks = obtain_block_sequence(func)
+	i = 0
+	visited = {}
+	start_node = None
+	for bl in control_blocks:
+		start = control_blocks[bl][0]
+		end = control_blocks[bl][1]
+		src_node = (start, end)
+		if src_node not in visited:
+			src_id = len(cfg)
+			visited[src_node] = src_id
+			cfg.add_node(src_id)
+			cfg.node[src_id]['label'] = src_node
+		else:
+			src_id = visited[src_node]
+
+		#if end in seq_blocks and GetMnem(PrevHead(end)) != 'jmp':
+		if start == func_start:
+			cfg.node[src_id]['c'] = "start"
+			start_node = src_node
+		if end == func_end:
+			cfg.node[src_id]['c'] = "end"
+		#print control_ea, 1
+		refs = CodeRefsTo(start, 0)
+		for ref in refs:
+			if ref in control_blocks:
+				dst_node = control_blocks[ref]
+				if dst_node not in visited:
+					visited[dst_node] = len(cfg)
+				dst_id = visited[dst_node]
+				cfg.add_edge(dst_id, src_id)
+				cfg.node[dst_id]['label'] = dst_node
+		#print control_ea, 1
+		refs = CodeRefsTo(start, 1)
+		for ref in refs:
+			if ref in control_blocks:
+				dst_node = control_blocks[ref]
+				if dst_node not in visited:
+					visited[dst_node] = len(cfg)
+				dst_id = visited[dst_node]
+				cfg.add_edge(dst_id, src_id)
+				cfg.node[dst_id]['label'] = dst_node
+	#print "attributing"
+	attributingRe(cfg, externs_eas, ea_externs)
+	# removing deadnodes
+	#old_cfg = copy.deepcopy(cfg)
+	#transform(cfg)
+	return cfg, 0
+
+def transform(cfg):
+	merging(cfg)
+	filtering(cfg)
+
+def merging(cfg):
+	bb_ids = cfg.nodes()
+	for bb_id in bb_ids:
+		try:
+			bb = cfg.node[bb_id]['label']
+			bb_start = bb[0]
+			bb_end = bb[1]
+			succs = cfg.successors(bb_id)
+			#preds = cfg.predecessors(bb_id)
+			if len(succs) == 1:
+				preds = cfg.predecessors(succs[0])
+				if len(preds) == 1:
+					domerge(cfg, bb_id, succs[0])
+		except:
+			pass
+
+def domerge(cfg, bb_id, suc_node):
+	suc_nodes = cfg.successors(suc_node)
+	for node in suc_nodes:
+		cfg.add_edge(bb_id, node)
+	cfg.remove_node(suc_node)
+
+
+def filtering(cfg):
+	rm_sets = []
+	for bb_id in cfg:
+		bb = cfg.node[bb_id]['label']
+		bb_start = bb[0]
+		bb_end = bb[1]
+		re = remove(bb_start, bb_end)
+		print bb_id, re, bb_start, bb_end
+		if re:
+			print re, bb_id
+			rm_sets.append(bb_id)
+	print rm_sets
+	for bb_id in rm_sets:
+		cfg.remove_node(bb_id)
+
+def remove(bb_start, bb_end):
+	seqs = getSequences(bb_start, bb_end)
+	if matchseq(seqs):
+		return True
+	return False
+
+def matchseq(seqs):
+	mips = set(['lw', "jr", "addiu"])
+	x86 = set(['add', 'pop', 'retn'])
+	b_mips = set(['b', ('move','$v0')])
+	b_x86 = set(['b', ('mov','$eax')])
+	re_mips = set([('move','$v0')])
+	re_x86 = set([('mov','$eax')])
+	diff_mips = set(seqs).difference(set(mips))
+	if len(diff_mips) == 0:
+		return True
+	diff_x86 = set(seqs).difference(set(x86))
+	if len(diff_x86) == 0:
+		return True
+	if set(seqs) == b_mips:
+		return True
+	if set(seqs) == b_x86:
+		return True
+	if set(seqs) == re_mips:
+		return True
+	if set(seqs) == re_x86:
+		return True
+	return False
+
+def attributingRe(cfg, externs_eas, ea_externs):
+	for node_id in cfg:
+		bl = cfg.node[node_id]['label']
+		numIns = calInsts(bl)
+		cfg.node[node_id]['numIns'] = numIns
+		numCalls = calCalls(bl)
+		cfg.node[node_id]['numCalls'] = numCalls
+		numLIs = calLogicInstructions(bl)
+		cfg.node[node_id]['numLIs'] = numLIs
+		numAs = calArithmeticIns(bl)
+		cfg.node[node_id]['numAs'] = numAs
+		strings, consts = getBBconsts(bl)
+		cfg.node[node_id]['numNc'] = len(strings) + len(consts)
+		cfg.node[node_id]['consts'] = consts
+		cfg.node[node_id]['strings'] = strings
+		externs = retrieveExterns(bl, ea_externs)
+		cfg.node[node_id]['externs'] = externs
+		numTIs = calTransferIns(bl)
+		cfg.node[node_id]['numTIs'] = numTIs
+
+
+def attributing(cfg):
+	ga = graph_analysis()
+	ga.gwithoffspring(cfg)
+	print "finishing offspring"
+	for node in cfg:
+		stmt_num = getStmtNum(node)
+		binary_value = getBinaryValue(node)
+		cfg.node[node]['stmt_num'] = stmt_num
+		cfg.node[node]['binary_value'] = binary_value
+	ga.domChecking(cfg)
+	print "finishing domChecking"
+	ga.loopChecking(cfg)
+	print "finishing loopChecking"
+
+
+def getStmtNum(node):
+	start = node[0]
+	end = node[1]
+	stmt_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		inst_addr = NextHead(inst_addr)
+		stmt_num += 1
+	return stmt_num
+
+def getBinaryValue(node):
+	start = node[0]
+	inst_addr = NextHead(start)
+	value = 0
+	addr = 0
+	for x in xrange((inst_addr - start)-1):
+		addr = start + x
+		y = GetOriginalByte(addr)
+		print value, addr, y
+		value = value | y
+		value = value << 8
+		print value
+
+	addr = inst_addr - 1
+	y = GetOriginalByte(addr)
+	print value, addr, y
+	value = value | y
+	print node
+	print bin(value)
+	return value
+
+
+def cfg_construct(func):
+	func_start = func.startEA
+	func_end = func.endEA
+	cfg = nx.DiGraph()
+	seq_blocks, main_blocks = obtain_block_sequence(func)
+	i = 0
+	visited = {}
+	for bl in seq_blocks:
+		start = seq_blocks[bl][0]
+		end = seq_blocks[bl][1]
+		src_node = (start, end)
+		if end in seq_blocks and GetMnem(PrevHead(end)) != 'jmp':
+						next_start = seq_blocks[end][0]
+						next_end = seq_blocks[end][1]
+						next_node = (next_start, next_end)
+						cfg.add_edge(src_node, next_node)
+		if start == func_start:
+			cfg.add_node(src_node, c='start')
+			start_node = src_node
+		if end == func_end:
+			cfg.add_node(src_node, c='end')
+		refs = CodeRefsFrom(PrevHead(end), 0)
+		
+		for ref in refs:
+						#print ref
+						if ref in seq_blocks:
+								dst_node = (seq_blocks[ref][0], seq_blocks[ref][1])
+								cfg.add_edge(src_node, dst_node)
+	return cfg, start_node
+
+
+def obtain_allpaths( cfg, node, path, allpaths):
+	path.append(node)
+	if 'c' in cfg.node[node] and cfg.node[node]['c'] == 'end':
+		allpaths.append(path)
+		return
+	else:
+		for suc in cfg.successors(node):
+						if suc not in path:
+								path_copy = copy.copy(path)
+								obtain_allpaths(cfg, suc, path_copy, allpaths)
+
+
+def obtain_block_sequence(func):
+	control_blocks = {}
+	main_blocks = {}
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	for bl in blocks:
+		base = bl[0]
+		end = PrevHead(bl[1])
+		control_ea = checkCB(bl)
+		control_blocks[control_ea] = bl
+		control_blocks[end] = bl
+		if func.startEA <= base <= func.endEA:
+						main_blocks[base] = bl
+		x = sorted(main_blocks)
+	return control_blocks, x
+
+def checkCB(bl):
+	start = bl[0]
+	end = bl[1]
+	ea = start
+	while ea < end:
+		if checkCondition(ea):
+			return ea
+		ea = NextHead(ea)
+
+	return PrevHead(end)
+
+def checkCondition(ea):
+	mips_branch = {"beqz":1, "beq":1, "bne":1, "bgez":1, "b":1, "bnez":1, "bgtz":1, "bltz":1, "blez":1, "bgt":1, "bge":1, "blt":1, "ble":1, "bgtu":1, "bgeu":1, "bltu":1, "bleu":1}
+	x86_branch = {"jz":1, "jnb":1, "jne":1, "je":1, "jg":1, "jle":1, "jl":1, "jge":1, "ja":1, "jae":1, "jb":1, "jbe":1, "jo":1, "jno":1, "js":1, "jns":1}
+	arm_branch = {"B":1, "BAL":1, "BNE":1, "BEQ":1, "BPL":1, "BMI":1, "BCC":1, "BLO":1, "BCS":1, "BHS":1, "BVC":1, "BVS":1, "BGT":1, "BGE":1, "BLT":1, "BLE":1, "BHI":1 ,"BLS":1 }
+	conds = {}
+	conds.update(mips_branch)
+	conds.update(x86_branch)
+	opcode = GetMnem(ea)
+	if opcode in conds:
+		return True
+	return False
--- a/Genius3/raw-feature-extractor/cfg_constructor.pyc
+++ b/Genius3/raw-feature-extractor/cfg_constructor.pyc
--- a/Genius3/raw-feature-extractor/discovRe.py
+++ b/Genius3/raw-feature-extractor/discovRe.py
@ -0,0 +1,228 @@
+#
+# Reference Lister
+#
+# List all functions and all references to them in the current section.
+#
+# Implemented with the idautils module
+#
+import networkx as nx
+import cPickle as pickle
+import pdb
+from graph_analysis_ida import *
+from graph_property import *
+#import wingdbstub
+#wingdbstub.Ensure()
+
+def get_funcs(ea):
+        funcs = {}
+        # Get current ea
+        # Loop from start to end in the current segment
+	for funcea in Functions(SegStart(ea)):
+		funcname = GetFunctionName(funcea)
+		func = get_func(funcea)
+		blocks = FlowChart(func)
+		funcs[funcname] = []
+		for bl in blocks:
+		        start = bl.startEA
+		        end = bl.endEA
+		        funcs[funcname].append((start, end))
+        return funcs
+
+def get_funcs_for_discoverRe(ea):
+    features = {}
+    for funcea in Functions(SegStart(ea)):
+        funcname = GetFunctionName(funcea)
+        print funcname
+        func = get_func(funcea)
+        feature = get_discoverRe_feature(func)
+        features[funcname] = feature
+    return features
+
+def get_discoverRe_feature(func, icfg):
+    start = func.startEA
+    end = func.endEA
+    features = []
+    FunctionCalls = getFuncCalls(func)
+    #1
+    features.append(FunctionCalls)
+    LogicInstr = getLogicInsts(func)
+    #2
+    features.append(LogicInstr)
+    Transfer = getTransferInsts(func)
+    #3
+    features.append(Transfer)
+    Locals = getLocalVariables(func)
+    #4
+    features.append(Locals)
+    BB = getBasicBlocks(func)
+    #5
+    features.append(BB)
+    Edges = len(icfg.edges())
+    #6
+    features.append(Edges)
+    Incoming = getIncommingCalls(func)
+    #7
+    features.append(Incoming)
+    #8
+    Instrs = getIntrs(func)
+    features.append(Instrs)
+    between = retrieveGP(icfg)
+    #9
+    features.append(between)
+
+    strings, consts = getfunc_consts(func)
+    features.append(strings)
+    features.append(consts)
+    return features
+
+def get_func_names(ea):
+    funcs = {}
+    for funcea in Functions(SegStart(ea)):
+            funcname = GetFunctionName(funcea)
+            funcs[funcname] = funcea
+    return funcs
+
+def get_func_bases(ea):
+        funcs = {}
+        for funcea in Functions(SegStart(ea)):
+                funcname = GetFunctionName(funcea)
+                funcs[funcea] = funcname
+        return funcs
+
+def get_func_range(ea):
+        funcs = {}
+        for funcea in Functions(SegStart(ea)):
+                funcname = GetFunctionName(funcea)
+		func = get_func(funcea)
+                funcs[funcname] = (func.startEA, func.endEA)
+        return funcs
+
+def get_func_sequences(ea):
+	funcs_bodylist = {}
+	funcs = get_funcs(ea)
+	for funcname in funcs:
+		if funcname not in funcs_bodylist:
+			funcs_bodylist[funcname] = []
+		for start, end in funcs[funcname]:
+			inst_addr = start
+			while inst_addr <= end:
+				opcode = GetMnem(inst_addr)
+				funcs_bodylist[funcname].append(opcode)
+				inst_addr = NextHead(inst_addr)
+        return funcs_bodylist
+
+def get_func_cfgs(ea):
+    func_cfglist = {}
+    i = 0
+    start, end = get_section('LOAD')
+    #print start, end
+    for funcea in Functions(SegStart(ea)):
+        if start <= funcea <= end:
+            funcname = GetFunctionName(funcea)
+            func = get_func(funcea)
+            print i
+            i += 1
+            try:
+                icfg = cfg.cfg_construct(func)
+                func_cfglist[funcname] = icfg
+            except:
+                pass
+            
+    return func_cfglist
+
+def get_section(t):
+    base = SegByName(t)
+    start = SegByBase(base)
+    end = SegEnd(start)
+    return start, end
+
+
+def get_func_cfg_sequences(func_cfglist):
+    func_cfg_seqlist = {}
+    for funcname in func_cfglist:
+        func_cfg_seqlist[funcname] = {}
+        cfg = func_cfglist[funcname][0]
+        for start, end in cfg:
+            codesq = get_sequences(start, end)
+            func_cfg_seqlist[funcname][(start,end)] = codesq
+
+    return func_cfg_seqlist
+
+
+def get_sequences(start, end):
+    seq = []
+    inst_addr = start
+    while inst_addr <= end:
+        opcode = GetMnem(inst_addr)
+        seq.append(opcode)
+        inst_addr = NextHead(inst_addr)
+    return seq
+
+def get_stack_arg(func_addr):
+    print func_addr
+    args = []
+    stack = GetFrame(func_addr)
+    if not stack:
+            return []
+    firstM = GetFirstMember(stack)
+    lastM = GetLastMember(stack)
+    i = firstM
+    while i <=lastM:
+        mName = GetMemberName(stack,i)
+        mSize = GetMemberSize(stack,i)
+        if mSize:
+                i = i + mSize
+        else:
+                i = i+4
+        if mName not in args and mName and ' s' not in mName and ' r' not in mName:
+            args.append(mName)
+    return args
+
+        #pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
+        
+def processDataSegs():
+    funcdata = {}
+    datafunc = {}
+    for n in xrange(idaapi.get_segm_qty()):
+        seg = idaapi.getnseg(n)
+        ea = seg.startEA
+        segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
+        if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
+            start = idc.SegStart(ea)
+            end = idc.SegEnd(ea)
+            cur = start
+            while cur <= end:
+                refs = [v for v in DataRefsTo(cur)]
+                for fea in refs:
+                    name = GetFunctionName(fea)
+                    if len(name)== 0:
+                        continue
+                    if name not in funcdata:
+                        funcdata[name] = [cur]
+                    else:
+                        funcdata[name].append(cur)
+                    if cur not in datafunc:
+                        datafunc[cur] = [name]
+                    else:
+                        datafunc[cur].append(name)
+                cur = NextHead(cur)
+    return funcdata, datafunc
+
+def obtainDataRefs(callgraph):
+    datarefs = {}
+    funcdata, datafunc = processDataSegs()
+    for node in callgraph:
+        if node in funcdata:
+            datas = funcdata[node]
+            for dd in datas:
+                refs = datafunc[dd]
+                refs = list(set(refs))
+                if node in datarefs:
+                    print refs
+                    datarefs[node] += refs
+                    datarefs[node] = list(set(datarefs[node]))
+                else:
+                    datarefs[node] = refs
+    return datarefs
+
+
--- a/Genius3/raw-feature-extractor/discovRe.pyc
+++ b/Genius3/raw-feature-extractor/discovRe.pyc
--- a/Genius3/raw-feature-extractor/func.py
+++ b/Genius3/raw-feature-extractor/func.py
@ -0,0 +1,293 @@
+# -*- coding: UTF-8 -*-
+#
+# Reference Lister
+#
+# List all functions and all references to them in the current section.
+#
+# Implemented with the idautils module
+#
+from idautils import *
+from idaapi import *
+from idc import *
+import networkx as nx
+import cfg_constructor as cfg
+import cPickle as pickle
+import pdb
+from raw_graphs import *
+#from discovRe_feature.discovRe import *
+from discovRe import *
+#import wingdbstub
+#wingdbstub.Ensure()
+
+def print_obj(obj):
+    "打印对象的所有属性"
+    print(obj.__dict__)
+
+def gt_funcNames(ea):
+	funcs = []
+	plt_func, plt_data = processpltSegs()
+	for funcea in Functions(SegStart(ea)):
+			funcname = get_unified_funcname(funcea)
+			if funcname in plt_func:
+				print funcname
+				continue
+			funcs.append(funcname)
+	return funcs
+
+def get_funcs(ea):
+	funcs = {}
+		# Get current ea
+		# Loop from start to end in the current segment
+	plt_func, plt_data = processpltSegs()
+	for funcea in Functions(SegStart(ea)):
+		funcname = get_unified_funcname(funcea)
+		if funcname in plt_func:
+			continue
+		func = get_func(funcea)
+		blocks = FlowChart(func)
+		funcs[funcname] = []
+		for bl in blocks:
+				start = bl.startEA
+				end = bl.endEA
+				funcs[funcname].append((start, end))
+	return funcs
+
+# used for the callgraph generation.
+def get_func_namesWithoutE(ea):
+	funcs = {}
+	plt_func, plt_data = processpltSegs()
+	for funcea in Functions(SegStart(ea)):
+			funcname = get_unified_funcname(funcea)
+			if 'close' in funcname:
+				print funcea
+			if funcname in plt_func:
+				print funcname
+				continue
+			funcs[funcname] = funcea
+	return funcs
+
+# used for the callgraph generation.
+def get_func_names(ea):
+	funcs = {}
+	for funcea in Functions(SegStart(ea)):
+			funcname = get_unified_funcname(funcea)
+			funcs[funcname] = funcea
+	return funcs
+
+def get_func_bases(ea):
+		funcs = {}
+		plt_func, plt_data = processpltSegs()
+		for funcea in Functions(SegStart(ea)):
+				funcname = get_unified_funcname(funcea)
+				if funcname in plt_func:
+					continue
+				funcs[funcea] = funcname
+		return funcs
+
+def get_func_range(ea):
+		funcs = {}
+		for funcea in Functions(SegStart(ea)):
+				funcname = get_unified_funcname(funcea)
+		func = get_func(funcea)
+		funcs[funcname] = (func.startEA, func.endEA)
+		return funcs
+
+def get_unified_funcname(ea):
+	funcname = GetFunctionName(ea)
+	if len(funcname) > 0:
+		if '.' == funcname[0]:
+			funcname = funcname[1:]
+	return funcname
+
+def get_func_sequences(ea):
+	funcs_bodylist = {}
+	funcs = get_funcs(ea)
+	for funcname in funcs:
+		if funcname not in funcs_bodylist:
+			funcs_bodylist[funcname] = []
+		for start, end in funcs[funcname]:
+			inst_addr = start
+			while inst_addr <= end:
+				opcode = GetMnem(inst_addr)
+				funcs_bodylist[funcname].append(opcode)
+				inst_addr = NextHead(inst_addr)
+	return funcs_bodylist
+
+def get_func_cfgs_c(ea):
+	# type: (object) -> object
+	binary_name = idc.GetInputFile()
+	raw_cfgs = raw_graphs(binary_name)
+	externs_eas, ea_externs = processpltSegs()
+	i = 0
+	for funcea in Functions(SegStart(ea)):
+		funcname = get_unified_funcname(funcea)
+		func = get_func(funcea)
+		print i
+		i += 1
+		icfg = cfg.getCfg(func, externs_eas, ea_externs)
+		func_f = get_discoverRe_feature(func, icfg[0])
+		raw_g = raw_graph(funcname, icfg, func_f) #生成一个rawcfg。raw_graph是一个python class，定义在 raw_graph.py
+		raw_cfgs.append(raw_g) # raw_graphs 是另一个python class，存储raw_graph的list。定义在 raw_graph.py
+		#print(raw_g.__dict__)
+		#print(raw_g) 由于raw_graph、raw_graphs都是class，直接print只会打印<raw_graphs.raw_graphs instance at 0x09888FD0>，不能打印对象的属性。	#https://blog.51cto.com/steed/2046408 print_obj、    print(obj.__dict__)
+	return raw_cfgs
+
+def get_func_cfgs_ctest(ea):
+	binary_name = idc.GetInputFile()
+	raw_cfgs = raw_graphs(binary_name)
+	externs_eas, ea_externs = processpltSegs()
+	i = 0
+	diffs = {}
+	for funcea in Functions(SegStart(ea)):
+		funcname = get_unified_funcname(funcea)
+		func = get_func(funcea)
+		print i
+		i += 1
+		icfg, old_cfg = cfg.getCfg(func, externs_eas, ea_externs)
+		diffs[funcname] = (icfg, old_cfg)
+		#raw_g = raw_graph(funcname, icfg)
+		#raw_cfgs.append(raw_g)
+			
+	return diffs
+
+def get_func_cfgs(ea):
+	func_cfglist = {}
+	i = 0
+	for funcea in Functions(SegStart(ea)):
+		funcname = get_unified_funcname(funcea)
+		func = get_func(funcea)
+		print i
+		i += 1
+		try:
+			icfg = cfg.getCfg(func)
+			func_cfglist[funcname] = icfg
+		except:
+			pass
+			
+	return func_cfglist
+
+def get_func_cfg_sequences(func_cfglist):
+	func_cfg_seqlist = {}
+	for funcname in func_cfglist:
+		func_cfg_seqlist[funcname] = {}
+		cfg = func_cfglist[funcname][0]
+		for start, end in cfg:
+			codesq = get_sequences(start, end)
+			func_cfg_seqlist[funcname][(start,end)] = codesq
+
+	return func_cfg_seqlist
+
+
+def get_sequences(start, end):
+	seq = []
+	inst_addr = start
+	while inst_addr <= end:
+		opcode = GetMnem(inst_addr)
+		seq.append(opcode)
+		inst_addr = NextHead(inst_addr)
+	return seq
+
+def get_stack_arg(func_addr):
+	print func_addr
+	args = []
+	stack = GetFrame(func_addr)
+	if not stack:
+			return []
+	firstM = GetFirstMember(stack)
+	lastM = GetLastMember(stack)
+	i = firstM
+	while i <=lastM:
+		mName = GetMemberName(stack,i)
+		mSize = GetMemberSize(stack,i)
+		if mSize:
+				i = i + mSize
+		else:
+				i = i+4
+		if mName not in args and mName and ' s' not in mName and ' r' not in mName:
+			args.append(mName)
+	return args
+
+		#pickle.dump(funcs, open('C:/Documents and Settings/Administrator/Desktop/funcs','w'))
+
+def processExternalSegs():
+	funcdata = {}
+	datafunc = {}
+	for n in xrange(idaapi.get_segm_qty()):
+		seg = idaapi.getnseg(n)
+		ea = seg.startEA
+		segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
+		if segtype in [idc.SEG_XTRN]:
+			start = idc.SegStart(ea)
+			end = idc.SegEnd(ea)
+			cur = start
+			while cur <= end:
+				name = get_unified_funcname(cur)
+				funcdata[name] = hex(cur)
+				cur = NextHead(cur)
+	return funcdata
+
+def processpltSegs():
+	funcdata = {}
+	datafunc = {}
+	for n in xrange(idaapi.get_segm_qty()):
+		seg = idaapi.getnseg(n)
+		ea = seg.startEA
+		segname = SegName(ea)
+		if segname in ['.plt', 'extern', '.MIPS.stubs']:
+			start = seg.startEA
+			end = seg.endEA
+			cur = start
+			while cur < end:
+				name = get_unified_funcname(cur)
+				funcdata[name] = hex(cur)
+				datafunc[cur]= name
+				cur = NextHead(cur)
+	return funcdata, datafunc
+
+		
+def processDataSegs():
+	funcdata = {}
+	datafunc = {}
+	for n in xrange(idaapi.get_segm_qty()):
+		seg = idaapi.getnseg(n)
+		ea = seg.startEA
+		segtype = idc.GetSegmentAttr(ea, idc.SEGATTR_TYPE)
+		if segtype in [idc.SEG_DATA, idc.SEG_BSS]:
+			start = idc.SegStart(ea)
+			end = idc.SegEnd(ea)
+			cur = start
+			while cur <= end:
+				refs = [v for v in DataRefsTo(cur)]
+				for fea in refs:
+					name = get_unified_funcname(fea)
+					if len(name)== 0:
+						continue
+					if name not in funcdata:
+						funcdata[name] = [cur]
+					else:
+						funcdata[name].append(cur)
+					if cur not in datafunc:
+						datafunc[cur] = [name]
+					else:
+						datafunc[cur].append(name)
+				cur = NextHead(cur)
+	return funcdata, datafunc
+
+def obtainDataRefs(callgraph):
+	datarefs = {}
+	funcdata, datafunc = processDataSegs()
+	for node in callgraph:
+		if node in funcdata:
+			datas = funcdata[node]
+			for dd in datas:
+				refs = datafunc[dd]
+				refs = list(set(refs))
+				if node in datarefs:
+					print refs
+					datarefs[node] += refs
+					datarefs[node] = list(set(datarefs[node]))
+				else:
+					datarefs[node] = refs
+	return datarefs
+
+
--- a/Genius3/raw-feature-extractor/func.pyc
+++ b/Genius3/raw-feature-extractor/func.pyc
--- a/Genius3/raw-feature-extractor/graph_analysis_ida.py
+++ b/Genius3/raw-feature-extractor/graph_analysis_ida.py
@ -0,0 +1,257 @@
+from idautils import *
+from idaapi import *
+from idc import *
+
+def getfunc_consts(func):
+	strings = []
+	consts = []
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	for bl in blocks:
+		strs, conts = getBBconsts(bl)
+		strings += strs
+		consts += conts
+	return strings, consts
+
+def getConst(ea, offset):
+	strings = []
+	consts = []
+	optype1 = GetOpType(ea, offset)
+	if optype1 == idaapi.o_imm:
+		imm_value = GetOperandValue(ea, offset)
+		if 0<= imm_value <= 10:
+			consts.append(imm_value)
+		else:
+			if idaapi.isLoaded(imm_value) and idaapi.getseg(imm_value):
+				str_value = GetString(imm_value)
+				if str_value is None:
+					str_value = GetString(imm_value+0x40000)
+					if str_value is None:
+						consts.append(imm_value)
+					else:
+						re = all(40 <= ord(c) < 128 for c in str_value)
+						if re:
+							strings.append(str_value)
+						else:
+							consts.append(imm_value)
+				else:
+					re = all(40 <= ord(c) < 128 for c in str_value)
+					if re:
+						strings.append(str_value)
+					else:
+						consts.append(imm_value)
+			else:
+				consts.append(imm_value)
+	return strings, consts
+
+def getBBconsts(bl):
+	strings = []
+	consts = []
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		if opcode in ['la','jalr','call', 'jal']:
+			inst_addr = NextHead(inst_addr)
+			continue
+		strings_src, consts_src = getConst(inst_addr, 0)
+		strings_dst, consts_dst = getConst(inst_addr, 1)
+		strings += strings_src
+		strings += strings_dst
+		consts += consts_src
+		consts += consts_dst
+		try:
+			strings_dst, consts_dst = getConst(inst_addr, 2)
+			consts += consts_dst
+			strings += strings_dst
+		except:
+			pass
+
+		inst_addr = NextHead(inst_addr)
+	return strings, consts
+
+def getFuncCalls(func):
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	sumcalls = 0
+	for bl in blocks:
+		callnum = calCalls(bl)
+		sumcalls += callnum
+	return sumcalls
+
+def getLogicInsts(func):
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	sumcalls = 0
+	for bl in blocks:
+		callnum = calLogicInstructions(bl)
+		sumcalls += callnum
+	return sumcalls
+
+def getTransferInsts(func):
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	sumcalls = 0
+	for bl in blocks:
+		callnum = calTransferIns(bl)
+		sumcalls += callnum
+	return sumcalls
+
+def getIntrs(func):
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	sumcalls = 0
+	for bl in blocks:
+		callnum = calInsts(bl)
+		sumcalls += callnum
+	return sumcalls	
+
+def getLocalVariables(func):
+	args_num = get_stackVariables(func.startEA)
+	return args_num
+
+def getBasicBlocks(func):
+	blocks = [(v.startEA, v.endEA) for v in FlowChart(func)]
+	return len(blocks)
+
+def getIncommingCalls(func):
+	refs = CodeRefsTo(func.startEA, 0)
+	re = len([v for v in refs])
+	return re
+
+
+def get_stackVariables(func_addr):
+    #print func_addr
+    args = []
+    stack = GetFrame(func_addr)
+    if not stack:
+            return 0
+    firstM = GetFirstMember(stack)
+    lastM = GetLastMember(stack)
+    i = firstM
+    while i <=lastM:
+        mName = GetMemberName(stack,i)
+        mSize = GetMemberSize(stack,i)
+        if mSize:
+                i = i + mSize
+        else:
+                i = i+4
+        if mName not in args and mName and 'var_' in mName:
+            args.append(mName)
+    return len(args)
+
+
+
+def calArithmeticIns(bl):
+	x86_AI = {'add':1, 'sub':1, 'div':1, 'imul':1, 'idiv':1, 'mul':1, 'shl':1, 'dec':1, 'inc':1}
+	mips_AI = {'add':1, 'addu':1, 'addi':1, 'addiu':1, 'mult':1, 'multu':1, 'div':1, 'divu':1}
+	calls = {}
+	calls.update(x86_AI)
+	calls.update(mips_AI)
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		if opcode in calls:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
+
+def calCalls(bl):
+	calls = {'call':1, 'jal':1, 'jalr':1}
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		if opcode in calls:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
+
+def calInsts(bl):
+	start = bl[0]
+	end = bl[1]
+	ea = start
+	num = 0
+	while ea < end:
+		num += 1
+		ea = NextHead(ea)
+	return num
+
+def calLogicInstructions(bl):
+	x86_LI = {'and':1, 'andn':1, 'andnpd':1, 'andpd':1, 'andps':1, 'andnps':1, 'test':1, 'xor':1, 'xorpd':1, 'pslld':1}
+	mips_LI = {'and':1, 'andi':1, 'or':1, 'ori':1, 'xor':1, 'nor':1, 'slt':1, 'slti':1, 'sltu':1}
+	calls = {}
+	calls.update(x86_LI)
+	calls.update(mips_LI)
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		if opcode in calls:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
+
+def calSconstants(bl):
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		if opcode in calls:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
+
+
+def calNconstants(bl):
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		optype1 = GetOpType(inst_addr, 0)
+		optype2 = GetOpType(inst_addr, 1)
+		if optype1 == 5 or optype2 == 5:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
+
+def retrieveExterns(bl, ea_externs):
+	externs = []
+	start = bl[0]
+	end = bl[1]
+	inst_addr = start
+	while inst_addr < end:
+		refs = CodeRefsFrom(inst_addr, 1)
+		try:
+			ea = [v for v in refs if v in ea_externs][0]
+			externs.append(ea_externs[ea])
+		except:
+			pass
+		inst_addr = NextHead(inst_addr)
+	return externs
+
+def calTransferIns(bl):
+	x86_TI = {'jmp':1, 'jz':1, 'jnz':1, 'js':1, 'je':1, 'jne':1, 'jg':1, 'jle':1, 'jge':1, 'ja':1, 'jnc':1, 'call':1}
+	mips_TI = {'beq':1, 'bne':1, 'bgtz':1, "bltz":1, "bgez":1, "blez":1, 'j':1, 'jal':1, 'jr':1, 'jalr':1}
+	arm_TI = {'MVN':1, "MOV":1}
+	calls = {}
+	calls.update(x86_TI)
+	calls.update(mips_TI)
+	start = bl[0]
+	end = bl[1]
+	invoke_num = 0
+	inst_addr = start
+	while inst_addr < end:
+		opcode = GetMnem(inst_addr)
+		re = [v for v in calls if opcode in v]
+		if len(re) > 0:
+			invoke_num += 1
+		inst_addr = NextHead(inst_addr)
+	return invoke_num
--- a/Genius3/raw-feature-extractor/graph_analysis_ida.pyc
+++ b/Genius3/raw-feature-extractor/graph_analysis_ida.pyc
--- a/Genius3/raw-feature-extractor/graph_property.py
+++ b/Genius3/raw-feature-extractor/graph_property.py
@ -0,0 +1,24 @@
+import networkx as nx
+import pdb
+def betweeness(g):
+	#pdb.set_trace()
+	betweenness = nx.betweenness_centrality(g)
+	return betweenness
+
+def eigenvector(g):
+	centrality = nx.eigenvector_centrality(g)
+	return centrality
+
+def closeness_centrality(g):
+	closeness = nx.closeness_centrality(g)
+	return closeness
+
+def retrieveGP(g):
+	bf = betweeness(g)
+	#close = closeness_centrality(g)
+	#bf_sim = 
+	#close_sim = 
+	x = sorted(bf.values())
+	value = sum(x)/len(x)
+	return round(value,5)
+
--- a/Genius3/raw-feature-extractor/graph_property.pyc
+++ b/Genius3/raw-feature-extractor/graph_property.pyc
--- a/Genius3/raw-feature-extractor/preprocessing_ida.py
+++ b/Genius3/raw-feature-extractor/preprocessing_ida.py
@ -0,0 +1,51 @@
+# -*- coding: UTF-8 -*-
+import sys
+
+from func import *
+from raw_graphs import *
+from idc import *
+import os
+import argparse
+import raw_graphs
+
+def print_obj(obj):
+    "打印对象的所有属性"
+    print(obj.__dict__)
+
+def parse_command():
+	parser = argparse.ArgumentParser(description='Process some integers.')
+	parser.add_argument("--path", type=str, help="The directory where to store the generated .ida file")
+	args = parser.parse_args()
+	return args
+
+if __name__ == '__main__':
+	#print str(sys.argv) #['raw-feature-extractor/preprocessing_ida.py']
+	#print str(idc.ARGV) #['raw-feature-extractor/preprocessing_ida.py', '--path', 'C:\\Program1\\pycharmproject\\Genius3\\new']
+	#print idc.ARGV[2]
+	#print type(idc.ARGV[2])
+	args = parse_command()
+	#path = args.path
+	path = idc.ARGV[2]
+	analysis_flags = idc.GetShortPrm(idc.INF_START_AF)
+	analysis_flags &= ~idc.AF_IMMOFF
+	# turn off "automatically make offset" heuristic
+	idc.SetShortPrm(idc.INF_START_AF, analysis_flags)
+	idaapi.autoWait()
+	cfgs = get_func_cfgs_c(FirstSeg())
+	binary_name = idc.GetInputFile() + '.ida'
+	print path
+	print binary_name
+	fullpath = os.path.join(path, binary_name)
+	pickle.dump(cfgs, open(fullpath,'w'))
+	#print binary_name
+
+	testpath="C:\Program1\pycharmproject\Genius3/acfgs/hpcenter.ida"
+	fr = open(fullpath,'r')
+	data1 = pickle.load(fr)
+	print(type(data1)) #<type 'instance'>
+	print(data1.raw_graph_list[393].__dict__)
+	print(data1.raw_graph_list[393].g)
+	print(data1.raw_graph_list[393].g.nodes())
+	#print_obj(data1)
+	#print cfgs.raw_graph_list[0]
+	#idc.Exit(0)
--- a/Genius3/raw-feature-extractor/raw_graphs.py
+++ b/Genius3/raw-feature-extractor/raw_graphs.py
@ -0,0 +1,288 @@
+# -*- coding: UTF-8 -*-
+import itertools
+import sys
+sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
+import networkx as nx
+#import numpy as np
+from subprocess import Popen, PIPE
+import pdb
+import os
+import re,mmap
+#from graph_edit_new import *
+
+class raw_graph:
+	def __init__(self, funcname, g, func_f):
+		#print "create"
+		self.funcname = funcname
+		self.old_g = g[0]
+		self.g = nx.DiGraph()
+		self.entry = g[1]
+		self.fun_features = func_f
+		self.attributing()
+
+	def __len__(self):
+		return len(self.g)
+
+	def attributing(self):
+		self.obtainOffsprings(self.old_g)
+		for node in self.old_g:
+			fvector = self.retrieveVec(node, self.old_g)
+			self.g.add_node(node)
+			self.g.node[node]['v'] = fvector
+
+		for edge in self.old_g.edges():
+			node1 = edge[0]
+			node2 = edge[1]
+			self.g.add_edge(node1, node2)
+
+	def obtainOffsprings(self,g):
+		nodes = g.nodes()
+		for node in nodes:
+			offsprings = {}
+			self.getOffsprings(g, node, offsprings)
+			g.node[node]['offs'] = len(offsprings)
+		return g
+
+	def getOffsprings(self, g, node, offsprings):
+		node_offs = 0
+		sucs = g.successors(node)
+		for suc in sucs:
+			if suc not in offsprings:
+				offsprings[suc] = 1
+				self.getOffsprings(g, suc, offsprings)
+
+	def retrieveVec(self, id_, g):
+		feature_vec = []
+		#numC0
+		numc = g.node[id_]['consts']
+		feature_vec.append(numc)
+		#nums1
+		nums = g.node[id_]['strings']
+		feature_vec.append(nums)
+		#offsprings2
+		offs = g.node[id_]['offs']
+		feature_vec.append(offs)
+		#numAs3
+		numAs = g.node[id_]['numAs']
+		feature_vec.append(numAs)
+		# of calls4
+		calls = g.node[id_]['numCalls']
+		feature_vec.append(calls)
+		# of insts5
+		insts = g.node[id_]['numIns']
+		feature_vec.append(insts)
+		# of LIs6
+		insts = g.node[id_]['numLIs']
+		feature_vec.append(insts)
+		# of TIs7
+		insts = g.node[id_]['numTIs']
+		feature_vec.append(insts)	
+		return feature_vec
+
+
+	def enumerating(self, n):
+		subgs = []
+		#pdb.set_trace()
+		for sub_nodes in itertools.combinations(self.g.nodes(), n):
+		    subg = self.g.subgraph(sub_nodes)
+		    u_subg = subg.to_undirected()
+		    if nx.is_connected(u_subg):
+		        subgs.append(subg)
+		return subgs
+
+
+	def genMotifs(self, n):
+		motifs = {}
+		subgs = enumerating(n)
+		for subg in subgs:
+			if len(motifs) == 0:
+				motifs[subg] = [subg]
+			else:
+				nomatch = True
+				for mt in motifs:
+					if nx.is_isomorphic(mt, subg):
+						motifs[mt].append(subg)
+						nomatch = False
+				if nomatch:
+					motifs[subg] = [subg]
+		return motifs
+
+	def enumerating_efficient(self, n):
+		#pdb.set_trace()
+		if len(self.g) >= 200:
+			return []
+		with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt','wb') as f:
+			nx.write_edgelist(self.g,f,data=False)
+		#pdb.set_trace()
+		process = Popen(["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n), "100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt", "1", "0", "0", "2", "0", "0", "0", "1000", "3", "3", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"], stdout=PIPE, stderr=PIPE)
+		stdout, stderr = process.communicate()
+		if process.returncode >= 0:
+		#os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1")
+		#pdb.set_trace()
+			#pdb.set_trace()
+			subgs = self.parseOutput("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n)
+			#pdb.set_trace()
+			os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump")
+			return subgs
+		return []
+
+	def parseOutput(self, path, n):
+		pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
+		subgraphs = []
+		with open(path,'r') as f:
+			data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
+			mo = re.findall(pattern, data)
+			if mo:
+				results = [map(int, v.split(',')[1:]) for v in mo]
+				subgraphs = self.createGraphDirectly(results)
+		return subgraphs
+
+	def parseOutputByconditions(self, path, n):
+		pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
+		subgraphs = []
+		with open(path,'r') as f:
+			data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
+			mo = re.findall(pattern, data)
+			if mo:
+				results = [map(int, v.split(',')[1:]) for v in mo]
+				subgraphs = self.create_Graphbycondition_Directly(results)
+		return subgraphs
+
+	def create_Graphbycondition_Directly(self, results):
+		subgs = []
+		for indexes in results:
+			tg = template_graph()
+			subg = self.g.subgraph(indexes)
+			tg.updateG(subg)
+			subgs.append(tg)
+			del tg
+		return subgs
+
+	def createGraphDirectly(self, results):
+		#pdb.set_trace()
+		#subgs = [self.g.subgraph(indexes) for indexes in results]
+		subgs = []
+		for indexes in results:
+			tg = template_graph()
+			subg = self.g.subgraph(indexes)
+			tg.updateG(subg)
+			subgs.append(tg)
+			del tg
+		return subgs
+
+	def createGraph(self, results, n):
+		binary_value = int(results[0],2)
+		indexes = [int(v) for v in results[1:]]
+		fang = self.createG(results[0], n)
+		if fang:
+			tg = template_graph(binary_value)
+			tg.updateG(fang, indexes, self.g)
+			return tg
+		pdb.set_trace()
+		print "there is g which is none"
+
+	def createG(self, binary_str, n):
+		g = nx.DiGraph()
+		l = [int(v) for v in binary_str]
+		#pdb.set_trace()
+		shape = (n, n)
+		data = np.array(l)
+		ad_matrix = data.reshape(shape)
+		for i in xrange(n):
+			for j in xrange(n):
+				if ad_matrix[i][j] == 1:
+					g.add_edge(i, j)
+		return g
+			
+
+
+class raw_graphs: #创建空的list，然后存储raw_graphs类的instance
+	def __init__(self, binary_name):
+		self.binary_name = binary_name
+		self.raw_graph_list = []
+
+	def append(self, raw_g):
+		self.raw_graph_list.append(raw_g)
+
+	def __len__(self):
+		return len(self.raw_graph_list)
+
+
+class graphlets:
+	def __init__(self, funcname):
+		self.funcname = funcname
+		self.graphlets_list = []
+		self.binary_name = None
+
+	def updateBN(self, binary_name):
+		self.binary_name = binary_name
+
+	def append(self, subg):
+		self.graphlets_list.append(subg)
+
+	def appendSet(self, subgs):
+		self.graphlets_list += subgs
+
+	def __len__(self):
+		return len(self.graphlets_list)
+
+class template_graph:
+	def __init__(self, value=None):
+		self.value = value
+		self.g = None
+
+	def updateG(self,g):
+		self.g = g
+	#def updateIndexes(self, indexes):
+	#	self.indexes = indexes
+
+	#def updateAttributes(self, pg, indexes, maing):
+	#	for id_ in xrange(len(indexes)):
+	#		index = indexes[id_]
+	#		gnode = self.findNode(index, maing)
+	#		self.g.node[gnode] = pg.node[index]
+
+
+class template_graphs:
+	def __init__(self, size):
+		self.size = size
+		self.gs = []
+		self.bit_len = None
+
+	def enumeratingAll(self):
+		subgs = []
+		binary_value = self.genBinValue()
+		for i in xrange(binary_value):
+			if i == 0 :
+				continue
+			g = self.createG(i)
+			if g:
+				tg = template_graph(i)
+				tg.updateG(g)
+				self.gs.append(tg)
+
+	def genBinValue(self):
+		n = self.size
+		self.bit_len = n*n
+		return 2**(self.bit_len)
+
+	def createG(self, i):
+		g = nx.DiGraph()
+		l = self.genArray(i)
+		#pdb.set_trace()
+		shape = (self.size, self.size)
+		data = np.array(l)
+		ad_matrix = data.reshape(shape)
+		for i in xrange(self.size):
+			for j in xrange(self.size):
+				if ad_matrix[i][j] == 1:
+					g.add_edge(i, j)
+		u_g = g.to_undirected()
+		if len(g) == self.size and nx.is_connected(u_g):
+			return g
+		return False
+
+	def genArray(self, i):
+		l = [int(x) for x in bin(i)[2:]]
+		x = [0 for v in xrange(self.bit_len - len(l))]
+		return x + l
--- a/Genius3/raw-feature-extractor/raw_graphs.pyc
+++ b/Genius3/raw-feature-extractor/raw_graphs.pyc
--- a/Genius3/raw-feature-extractor/test.py
+++ b/Genius3/raw-feature-extractor/test.py
@ -0,0 +1,8 @@
+
+import pickle
+testpath = "C:\Program1\pycharmproject\Genius3/acfgs/hpcenter.ida"
+fr = open(testpath, 'r')
+data1 = pickle.load(fr)
+print(type(data1))
+# # print_obj(data1)
+# print cfgs.raw_graph_list[0]
--- a/Genius3/search-engine/db.py
+++ b/Genius3/search-engine/db.py
@ -0,0 +1,356 @@
+import cPickle as pickle 
+from search import *
+from nearpy import Engine
+from nearpy.hashes import RandomDiscretizedProjections
+from nearpy.filters import NearestFilter, UniqueFilter
+from nearpy.distances import EuclideanDistance
+from nearpy.distances import CosineDistance
+from nearpy.hashes import RandomBinaryProjections
+from nearpy.experiments import DistanceRatioExperiment
+from redis import Redis
+from nearpy.storage import RedisStorage
+from feature import *
+import numpy as np
+import os
+import pdb
+import argparse
+import time
+import numpy as np
+from refactoring import *
+import pymongo
+from pymongo import MongoClient
+
+def initDB():
+	client = MongoClient()
+	client = MongoClient('localhost', 27017)
+	client = MongoClient('mongodb://localhost:27017/')
+	db = client.test_database
+	db = client['iot-encoding']
+	return db
+
+db = initDB()
+posts = db.posts
+
+class db:
+	
+	def __init__(self):
+		self.feature_list = {}
+		self.engine = None
+
+	def loadHashmap(self, feature_size, result_n):
+		# Create redis storage adapter
+		redis_object = Redis(host='localhost', port=6379, db=0)
+		redis_storage = RedisStorage(redis_object)
+		pdb.set_trace()
+		try:
+			# Get hash config from redis
+			config = redis_storage.load_hash_configuration('test')
+			# Config is existing, create hash with None parameters
+			lshash = RandomBinaryProjections(None, None)
+			# Apply configuration loaded from redis
+			lshash.apply_config(config)
+			
+		except:
+			# Config is not existing, create hash from scratch, with 10 projections
+			lshash = RandomBinaryProjections('test', 0)
+			
+
+		# Create engine for feature space of 100 dimensions and use our hash.
+		# This will set the dimension of the lshash only the first time, not when
+		# using the configuration loaded from redis. Use redis storage to store
+		# buckets.
+		nearest = NearestFilter(1000)
+		#self.engine = Engine(feature_size, lshashes=[], vector_filters=[])
+		pdb.set_trace()
+		self.engine = Engine(192, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance())
+
+		# Do some stuff like indexing or querying with the engine...
+
+		# Finally store hash configuration in redis for later use
+		redis_storage.store_hash_configuration(lshash)
+
+	def appendToDB(self, binary_name, funcname, fvector, firmware_name=""):
+		if fvector is None:
+			return
+		#ftuple = tuple([fvector])
+		self.engine.store_vector(np.asarray(fvector), ".".join((firmware_name,binary_name,funcname)))
+
+	def batch_appendDB(self, binary_name, features, firmware_name=""):
+		for funcname in features:
+			feature = features[funcname]
+			#pdb.set_trace()
+			self.appendToDB(binary_name, funcname, feature, firmware_name)
+
+	def batch_appendDBbyDir(self, base_dir):
+		cursor = posts.find({"firmware_name":"ddwrt-r21676_result"})
+		i = 0
+		for v in cursor:
+			print i
+			i+=1
+			binary_name = v['binary_name']
+			funcname = v['func_name']
+			firmware_name = v['firmware_name']
+			feature = v['fvector']
+			self.appendToDB(binary_name, funcname, feature, firmware_name)
+
+	def batch_appendDBbyDir1(self, base_dir):
+		image_dir = os.path.join(base_dir, "image")
+		firmware_featrues={}
+		bnum = 0
+		fnum = 0
+		i  = 0
+		pdb.set_trace()
+		for firmware_name in os.listdir(image_dir):
+			print firmware_name
+			firmware_featrues[firmware_name] = {}
+			firmware_dir = os.path.join(image_dir, firmware_name)
+			for binary_name in os.listdir(firmware_dir):
+				if binary_name.endswith(".features"):
+					bnum += 1
+					featrues_dir = os.path.join(firmware_dir, binary_name)
+					featrues = pickle.load(open(featrues_dir, "r"))
+					for funcname in featrues:
+						fnum +=1
+						#pdb.set_trace()
+						feature = featrues[funcname]
+						self.appendToDB(binary_name, funcname, feature, firmware_name)
+					del featrues
+		print("bnum ", bnum)
+		print("fnum ", fnum)
+
+	def dump(self, base_dir):
+		db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
+		pickle.dump(self.feature_list, open(db_dir, 'w'))
+		db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
+		pickle.dump(self.engine, open(db_dir, 'w'))
+
+	def loadDB(self, base_dir):
+		db_dir = os.path.join(base_dir, "data/db/busybox.feature_mapping")
+		self.feature_list = pickle.load(open(db_dir, 'r'))
+		db_dir = os.path.join(base_dir, "data/db/busybox.hashmap")
+		self.engine = pickle.load(open(db_dir, 'r'))
+
+	def findF(self, binary_name, funcname):
+		x = [v for v in self.feature_list if binary_name in self.feature_list[v] and funcname in self.feature_list[v][binary_name]]
+		return x[0]
+
+def retrieveFeaturesByDir(n, base_dir):
+	firmware_featrues={}
+	i = 0
+	for firmware_name in os.listdir(base_dir):
+		if firmware_name.endWith(".features"):
+			firmware_featrues[firmware_name] = {}
+			firmware_dir = os.path.join(base_dir, firmware_name)
+			if i > 0:
+				break
+			i += 1
+			pdb.set_trace()
+			for binary_name in os.listdir(firmware_dir):
+				featrues_dir = os.path.join(firmware_dir, binary_name + "_cb" + str(n) + ".features")
+				featrues = pickle.load(open(featrues_dir, "r"))
+				for funcname in featrues:
+					feature = featrues[funcname]
+					self.appendToDB(firmware_name, binary_name, funcname, feature)
+				del featrues
+
+def retrieveFeatures(n, base_dir, filename, funcs):
+	feature_dic = {}
+	featrues_dir = os.path.join(base_dir, "5000", filename + "_cb" + str(n) + ".features")
+	featrues = pickle.load(open(featrues_dir, "r"))
+	#featuresx = retrieveFeaturesx(filename)
+	for name in featrues:
+		#if name in funcs:
+		x = featrues[name] 
+		#+ featuresx[name]
+		feature_dic[name] = np.asarray(x)
+	return feature_dic
+
+def retrieveVuldb(base_input_dir):
+	vul_path = os.path.join(base_input_dir, "vul")
+	vul_db = pickle.load(open(vul_path, "r"))
+	return vul_db
+
+
+def retrieveFeaturesx(filename):
+	ida_input_dir = os.path.join("./data/", filename + ".features")
+	featuresx = pickle.load(open(ida_input_dir, "r"))
+	return featuresx
+
+def retrieveQueries(n, base_dir, filename1, featrues_src):
+	queries = {}
+	featrues_dir = os.path.join(base_dir, "5000", filename1 + "_cb" + str(n) + ".features")
+	featrues = pickle.load(open(featrues_dir, "r"))
+	#featuresx = retrieveFeaturesx(filename1)
+	for name in featrues:
+		#if name in featrues_src:
+		x = featrues[name] 
+		#+ featuresx[name]
+		queries[name] = np.asarray(x)
+	return queries
+
+def retrieveQueriesbyDir(n, base_dir, firmware_name, filename1):
+	queries = {}
+	featrues_dir = os.path.join(base_dir, firmware_name, filename1 + "_cb" + str(n) + ".features")
+	featrues = pickle.load(open(featrues_dir, "r"))
+	for name in featrues:
+		#del featrues[name][5]
+		queries[name] = np.asarray(featrues[name])
+	return queries
+
+def retrieveQuery(n, base_dir, filename, funcname):
+	featrues_dir = os.path.join(base_dir, filename + "_cb" + str(n) + ".features")
+	featrues = pickle.load(open(featrues_dir, "r"))
+	f = [featrues[v] for v in featrues if funcname in v ][0]
+	return np.asarray(f)
+
+def parse_command():
+	parser = argparse.ArgumentParser(description='Process some integers.')
+	parser.add_argument("--base_input_dir", type=str, help="raw binaries to process for training")
+	parser.add_argument('--output_dir', type=str, help="output dir")
+	parser.add_argument("--filename1", type=str, help="the size of each graphlet")
+	parser.add_argument("--filename2", type=str, help="the size of each graphlet")
+	parser.add_argument("--size", type=int, help="the size of each graphlet")
+	#parser.add_argument("--size", type=int, help="the size of each graphlet")
+	args = parser.parse_args()
+	return args
+
+def loadFuncs(path):
+	funcs = {}
+	x86_dir = os.path.join(path, "func_candid")
+	#mips_dir = os.path.join(path, "openssl1.0.1a_mips.ida")
+	fp = open(x86_dir,"r")
+	for line in fp:
+		items = line.split("\n")
+		funcname = items[0]
+		funcs[funcname] = 1
+	return funcs
+
+def dump(path, featrues, queries):
+	fp = open(path + "/" + "matrix", 'w')
+	for name in featrues:
+		row = []
+		row.append("x86")
+		row.append(name)
+		row += featrues[name]
+		fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" %tuple(row))
+	for name in queries:
+		row = []
+		row.append("mips")
+		row.append(name)
+		row += queries[name]
+		fp.write("%s\t%s\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n" % tuple(row))
+	fp.close()
+
+
+def queryBytwo(base_input_dir, filename1, filename2, n):
+	threthold = 50
+	db_instance = db()
+	funcs = loadFuncs(base_input_dir)
+	db_instance.loadHashmap(n, 50000)
+	#pdb.set_trace()
+	featrues = retrieveFeatures(n, base_input_dir, filename1, funcs)
+	queries = retrieveQueries(n, base_input_dir, filename2, funcs)
+	#queries = refactoring(queries, featrues)
+	vul_db = retrieveVuldb(base_input_dir)
+	pdb.set_trace()
+	#dump(base_input_dir, featrues, queries)
+	#start = time.time()
+	#db_instance.batch_appendDBbyDir(base_input_dir)
+	#end = time.time()
+	#total = end - start
+	#print total
+	db_instance.batch_appendDB(filename1, featrues)
+	pdb.set_trace()
+	ranks = []
+	times = []
+	for threthold in xrange(1, 210, 10):
+		hit = []
+		i = 0
+		for name in queries:
+			#print i 
+			i += 1
+			'''
+			if i == 1000:
+				print (sum(times)/len(times))
+				pdb.set_trace()
+				print "s"
+			'''
+			#if name not in vul_db['openssl']:
+			#	continue
+			if name not in featrues:
+				continue
+			#pdb.set_trace()
+			query = queries[name]
+			#start = time.time()
+			x = db_instance.engine.neighbours(query)
+			#end = time.time()
+			#total = end - start
+			#times.append(total)
+			#print total
+			#pdb.set_trace()
+			try:
+				rank = [v for v in xrange(len(x)) if name in x[v][1]][0]
+				ranks.append((name, rank))
+				if rank <= threthold:
+					hit.append(1)
+				else:
+					hit.append(0)
+			except:
+				#pdb.set_trace()
+				hit.append(0)
+				pass
+		#pdb.set_trace()
+		acc = sum(hit) * 1.0 / len(hit)
+		print acc
+
+def queryAll(base_dir, firmware_name, filename1, n):
+	threthold = 155
+	db_instance = db()
+	db_instance.loadHashmap(n, 50000)
+	queries = retrieveQueriesbyDir(n, base_dir, firmware_name, filename1)
+	start = time.time()
+	pdb.set_trace()
+	db_instance.batch_appendDBbyDir(n, base_dir)
+	end = time.time()
+	dur = end - start
+	print dur
+	pdb.set_trace()
+	hit = []
+	i = 0
+	times = []
+	for name in queries:
+		print i 
+		i += 1
+		query = queries[name]
+		start = time.clock()
+		x = db_instance.engine.neighbours(query)
+		end = time.clock()
+		dur = end - start
+		times.append(dur)
+		#pdb.set_trace()
+		try:
+			rank = [v for v in xrange(len(x)) if name in x[v][1]]
+			if len(rank) > 1:
+				pdb.set_trace()
+				print "stop"
+			if rank[0] <= threthold:
+				hit.append(1)
+			else:
+				hit.append(0)
+		except:
+			hit.append(0)
+	
+	acc = sum(hit) * 1.0 / len(hit)
+	mean = np.mean(times)
+	std =  np.std(times)
+	#pdb.set_trace()
+	print acc
+
+if __name__ == "__main__":
+	args = parse_command()
+	base_dir = args.base_input_dir
+	filename1 = args.filename1
+	filename2 = args.filename2
+	n = args.size
+	pdb.set_trace()
+	queryBytwo(base_dir, filename1, filename2, n)