297 lines
7.9 KiB
Python
297 lines
7.9 KiB
Python
# -*- coding: UTF-8 -*-
|
||
import itertools
|
||
import sys
|
||
|
||
# sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
|
||
# sys.path.insert(1, 'C:/Python27/Lib/site-packages')
|
||
|
||
import networkx as nx
|
||
import numpy as np
|
||
from subprocess import Popen, PIPE
|
||
import pdb
|
||
import os
|
||
import re
|
||
import mmap
|
||
# from graph_edit_new import *
|
||
|
||
class raw_graph:
|
||
def __init__(self, funcname, g, func_f, bb_f):
|
||
#print "create"
|
||
self.funcname = funcname
|
||
self.old_g = g[0]
|
||
self.g = nx.DiGraph()
|
||
self.entry = g[1]
|
||
self.bb_features = bb_f # len=bb数量,每个元素都是一个11维向量
|
||
self.fun_features = func_f
|
||
self.attributing()
|
||
|
||
def __len__(self):
|
||
return len(self.g)
|
||
|
||
def attributing(self):
|
||
self.obtainOffsprings(self.old_g)
|
||
for node in self.old_g:
|
||
fvector = self.retrieveVec(node, self.old_g)
|
||
self.g.add_node(node)
|
||
self.g.node[node]['v'] = fvector
|
||
|
||
for edge in self.old_g.edges():
|
||
node1 = edge[0]
|
||
node2 = edge[1]
|
||
self.g.add_edge(node1, node2)
|
||
|
||
def obtainOffsprings(self,g):
|
||
nodes = g.nodes()
|
||
for node in nodes:
|
||
offsprings = {}
|
||
self.getOffsprings(g, node, offsprings)
|
||
g.node[node]['offs'] = len(offsprings)
|
||
return g
|
||
|
||
def getOffsprings(self, g, node, offsprings):
|
||
node_offs = 0
|
||
sucs = g.successors(node)
|
||
for suc in sucs:
|
||
if suc not in offsprings:
|
||
offsprings[suc] = 1
|
||
self.getOffsprings(g, suc, offsprings)
|
||
|
||
# 提取acfg的属性特征
|
||
# 调用/传输/算术/逻辑/比较/移动/终止
|
||
# 数据声明/总指令数/字符串或整数常量/后代的数量
|
||
def retrieveVec(self, id_, g):
|
||
feature_vec = []
|
||
#numC0
|
||
numc = g.node[id_]['consts']
|
||
feature_vec.append(numc)
|
||
#nums1
|
||
nums = g.node[id_]['strings']
|
||
feature_vec.append(nums)
|
||
#offsprings2
|
||
offs = g.node[id_]['offs']
|
||
feature_vec.append(offs)
|
||
#numAs3
|
||
numAs = g.node[id_]['numAs']
|
||
feature_vec.append(numAs)
|
||
# of calls4
|
||
calls = g.node[id_]['numCalls']
|
||
feature_vec.append(calls)
|
||
# of insts5
|
||
insts = g.node[id_]['numIns']
|
||
feature_vec.append(insts)
|
||
# of LIs6
|
||
insts = g.node[id_]['numLIs'] #
|
||
feature_vec.append(insts)
|
||
# of TIs7
|
||
insts = g.node[id_]['numTIs'] #transfer instructions
|
||
feature_vec.append(insts)
|
||
return feature_vec
|
||
|
||
|
||
def enumerating(self, n):
|
||
subgs = []
|
||
#pdb.set_trace()
|
||
for sub_nodes in itertools.combinations(self.g.nodes(), n):
|
||
subg = self.g.subgraph(sub_nodes)
|
||
u_subg = subg.to_undirected()
|
||
if nx.is_connected(u_subg):
|
||
subgs.append(subg)
|
||
return subgs
|
||
|
||
|
||
def genMotifs(self, n):
|
||
motifs = {}
|
||
subgs = self.enumerating(n)
|
||
for subg in subgs:
|
||
if len(motifs) == 0:
|
||
motifs[subg] = [subg]
|
||
else:
|
||
nomatch = True
|
||
for mt in motifs:
|
||
if nx.is_isomorphic(mt, subg):
|
||
motifs[mt].append(subg)
|
||
nomatch = False
|
||
if nomatch:
|
||
motifs[subg] = [subg]
|
||
return motifs
|
||
|
||
def enumerating_efficient(self, n):
|
||
#pdb.set_trace()
|
||
if len(self.g) >= 200:
|
||
return []
|
||
with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt','wb') as f:
|
||
nx.write_edgelist(self.g,f,data=False)
|
||
#pdb.set_trace()
|
||
process = Popen(["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n), "100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt", "1", "0", "0", "2", "0", "0", "0", "1000", "3", "3", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"], stdout=PIPE, stderr=PIPE)
|
||
stdout, stderr = process.communicate()
|
||
if process.returncode >= 0:
|
||
#os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1")
|
||
#pdb.set_trace()
|
||
#pdb.set_trace()
|
||
subgs = self.parseOutput("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n)
|
||
#pdb.set_trace()
|
||
os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump")
|
||
return subgs
|
||
return []
|
||
|
||
def parseOutput(self, path, n):
|
||
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
|
||
subgraphs = []
|
||
with open(path,'r') as f:
|
||
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
|
||
mo = re.findall(pattern, data)
|
||
if mo:
|
||
results = [map(int, v.split(',')[1:]) for v in mo]
|
||
subgraphs = self.createGraphDirectly(results)
|
||
return subgraphs
|
||
|
||
def parseOutputByconditions(self, path, n):
|
||
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
|
||
subgraphs = []
|
||
with open(path,'r') as f:
|
||
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
|
||
mo = re.findall(pattern, data)
|
||
if mo:
|
||
results = [map(int, v.split(',')[1:]) for v in mo]
|
||
subgraphs = self.create_Graphbycondition_Directly(results)
|
||
return subgraphs
|
||
|
||
def create_Graphbycondition_Directly(self, results):
|
||
subgs = []
|
||
for indexes in results:
|
||
tg = template_graph()
|
||
subg = self.g.subgraph(indexes)
|
||
tg.updateG(subg)
|
||
subgs.append(tg)
|
||
del tg
|
||
return subgs
|
||
|
||
def createGraphDirectly(self, results):
|
||
#pdb.set_trace()
|
||
#subgs = [self.g.subgraph(indexes) for indexes in results]
|
||
subgs = []
|
||
for indexes in results:
|
||
tg = template_graph()
|
||
subg = self.g.subgraph(indexes)
|
||
tg.updateG(subg)
|
||
subgs.append(tg)
|
||
del tg
|
||
return subgs
|
||
|
||
def createGraph(self, results, n):
|
||
binary_value = int(results[0],2)
|
||
indexes = [int(v) for v in results[1:]]
|
||
fang = self.createG(results[0], n)
|
||
if fang:
|
||
tg = template_graph(binary_value)
|
||
tg.updateG(fang, indexes, self.g)
|
||
return tg
|
||
pdb.set_trace()
|
||
print("there is g which is none")
|
||
|
||
def createG(self, binary_str, n):
|
||
g = nx.DiGraph()
|
||
l = [int(v) for v in binary_str]
|
||
#pdb.set_trace()
|
||
shape = (n, n)
|
||
data = np.array(l)
|
||
ad_matrix = data.reshape(shape)
|
||
for i in xrange(n):
|
||
for j in xrange(n):
|
||
if ad_matrix[i][j] == 1:
|
||
g.add_edge(i, j)
|
||
return g
|
||
|
||
|
||
|
||
class raw_graphs: #创建空的list,然后存储raw_graphs类的instance
|
||
def __init__(self, binary_name):
|
||
self.binary_name = binary_name
|
||
self.raw_graph_list = []
|
||
|
||
def append(self, raw_g):
|
||
self.raw_graph_list.append(raw_g)
|
||
|
||
def __len__(self):
|
||
return len(self.raw_graph_list)
|
||
|
||
|
||
class graphlets:
|
||
def __init__(self, funcname):
|
||
self.funcname = funcname
|
||
self.graphlets_list = []
|
||
self.binary_name = None
|
||
|
||
def updateBN(self, binary_name):
|
||
self.binary_name = binary_name
|
||
|
||
def append(self, subg):
|
||
self.graphlets_list.append(subg)
|
||
|
||
def appendSet(self, subgs):
|
||
self.graphlets_list += subgs
|
||
|
||
def __len__(self):
|
||
return len(self.graphlets_list)
|
||
|
||
class template_graph:
|
||
def __init__(self, value=None):
|
||
self.value = value
|
||
self.g = None
|
||
|
||
def updateG(self,g):
|
||
self.g = g
|
||
#def updateIndexes(self, indexes):
|
||
# self.indexes = indexes
|
||
|
||
#def updateAttributes(self, pg, indexes, maing):
|
||
# for id_ in xrange(len(indexes)):
|
||
# index = indexes[id_]
|
||
# gnode = self.findNode(index, maing)
|
||
# self.g.node[gnode] = pg.node[index]
|
||
|
||
|
||
class template_graphs:
|
||
def __init__(self, size):
|
||
self.size = size
|
||
self.gs = []
|
||
self.bit_len = None
|
||
|
||
def enumeratingAll(self):
|
||
subgs = []
|
||
binary_value = self.genBinValue()
|
||
for i in xrange(binary_value):
|
||
if i == 0 :
|
||
continue
|
||
g = self.createG(i)
|
||
if g:
|
||
tg = template_graph(i)
|
||
tg.updateG(g)
|
||
self.gs.append(tg)
|
||
|
||
def genBinValue(self):
|
||
n = self.size
|
||
self.bit_len = n*n
|
||
return 2**(self.bit_len)
|
||
|
||
def createG(self, i):
|
||
g = nx.DiGraph()
|
||
l = self.genArray(i)
|
||
#pdb.set_trace()
|
||
shape = (self.size, self.size)
|
||
data = np.array(l)
|
||
ad_matrix = data.reshape(shape)
|
||
for i in xrange(self.size):
|
||
for j in xrange(self.size):
|
||
if ad_matrix[i][j] == 1:
|
||
g.add_edge(i, j)
|
||
u_g = g.to_undirected()
|
||
if len(g) == self.size and nx.is_connected(u_g):
|
||
return g
|
||
return False
|
||
|
||
def genArray(self, i):
|
||
l = [int(x) for x in bin(i)[2:]]
|
||
x = [0 for v in xrange(self.bit_len - len(l))]
|
||
return x + l
|