Gencoding_Ke/Genius3/raw-feature-extractor/raw_graphs.py
TinyCaviar badd4eada6 backup
2023-08-03 10:03:02 +08:00

297 lines
7.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: UTF-8 -*-
import itertools
import sys
# sys.path.insert(0, '/usr/local/lib/python2.7/dist-packages/')
# sys.path.insert(1, 'C:/Python27/Lib/site-packages')
import networkx as nx
import numpy as np
from subprocess import Popen, PIPE
import pdb
import os
import re
import mmap
# from graph_edit_new import *
class raw_graph:
def __init__(self, funcname, g, func_f, bb_f):
#print "create"
self.funcname = funcname
self.old_g = g[0]
self.g = nx.DiGraph()
self.entry = g[1]
self.bb_features = bb_f # len=bb数量,每个元素都是一个11维向量
self.fun_features = func_f
self.attributing()
def __len__(self):
return len(self.g)
def attributing(self):
self.obtainOffsprings(self.old_g)
for node in self.old_g:
fvector = self.retrieveVec(node, self.old_g)
self.g.add_node(node)
self.g.node[node]['v'] = fvector
for edge in self.old_g.edges():
node1 = edge[0]
node2 = edge[1]
self.g.add_edge(node1, node2)
def obtainOffsprings(self,g):
nodes = g.nodes()
for node in nodes:
offsprings = {}
self.getOffsprings(g, node, offsprings)
g.node[node]['offs'] = len(offsprings)
return g
def getOffsprings(self, g, node, offsprings):
node_offs = 0
sucs = g.successors(node)
for suc in sucs:
if suc not in offsprings:
offsprings[suc] = 1
self.getOffsprings(g, suc, offsprings)
# 提取acfg的属性特征
# 调用/传输/算术/逻辑/比较/移动/终止
# 数据声明/总指令数/字符串或整数常量/后代的数量
def retrieveVec(self, id_, g):
feature_vec = []
#numC0
numc = g.node[id_]['consts']
feature_vec.append(numc)
#nums1
nums = g.node[id_]['strings']
feature_vec.append(nums)
#offsprings2
offs = g.node[id_]['offs']
feature_vec.append(offs)
#numAs3
numAs = g.node[id_]['numAs']
feature_vec.append(numAs)
# of calls4
calls = g.node[id_]['numCalls']
feature_vec.append(calls)
# of insts5
insts = g.node[id_]['numIns']
feature_vec.append(insts)
# of LIs6
insts = g.node[id_]['numLIs'] #
feature_vec.append(insts)
# of TIs7
insts = g.node[id_]['numTIs'] #transfer instructions
feature_vec.append(insts)
return feature_vec
def enumerating(self, n):
subgs = []
#pdb.set_trace()
for sub_nodes in itertools.combinations(self.g.nodes(), n):
subg = self.g.subgraph(sub_nodes)
u_subg = subg.to_undirected()
if nx.is_connected(u_subg):
subgs.append(subg)
return subgs
def genMotifs(self, n):
motifs = {}
subgs = self.enumerating(n)
for subg in subgs:
if len(motifs) == 0:
motifs[subg] = [subg]
else:
nomatch = True
for mt in motifs:
if nx.is_isomorphic(mt, subg):
motifs[mt].append(subg)
nomatch = False
if nomatch:
motifs[subg] = [subg]
return motifs
def enumerating_efficient(self, n):
#pdb.set_trace()
if len(self.g) >= 200:
return []
with open('/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt','wb') as f:
nx.write_edgelist(self.g,f,data=False)
#pdb.set_trace()
process = Popen(["/home/qian/workspace/FANMOD-command_line-source/executables/./fanmod_command_line_linux", str(n), "100000", "1", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt", "1", "0", "0", "2", "0", "0", "0", "1000", "3", "3", "/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt", "0", "1"], stdout=PIPE, stderr=PIPE)
stdout, stderr = process.communicate()
if process.returncode >= 0:
#os.system("/home/qian/software/FANMOD-command_line-source/executables/./fanmod_command_line_linux " +str(n) + " 100000 1 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/OUTPUT.txt 1 0 0 2 0 0 0 1000 3 3 /home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt 0 1")
#pdb.set_trace()
#pdb.set_trace()
subgs = self.parseOutput("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump", n)
#pdb.set_trace()
os.remove("/home/qian/workspace/gEnding/gencoding/encoding/labeled/data/preprocessing/MotifCount.txt.dump")
return subgs
return []
def parseOutput(self, path, n):
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
subgraphs = []
with open(path,'r') as f:
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
mo = re.findall(pattern, data)
if mo:
results = [map(int, v.split(',')[1:]) for v in mo]
subgraphs = self.createGraphDirectly(results)
return subgraphs
def parseOutputByconditions(self, path, n):
pattern = re.compile('[0-9]+\,[0-9]+\,[0-9]+\,[0-9]+')
subgraphs = []
with open(path,'r') as f:
data = mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ)
mo = re.findall(pattern, data)
if mo:
results = [map(int, v.split(',')[1:]) for v in mo]
subgraphs = self.create_Graphbycondition_Directly(results)
return subgraphs
def create_Graphbycondition_Directly(self, results):
subgs = []
for indexes in results:
tg = template_graph()
subg = self.g.subgraph(indexes)
tg.updateG(subg)
subgs.append(tg)
del tg
return subgs
def createGraphDirectly(self, results):
#pdb.set_trace()
#subgs = [self.g.subgraph(indexes) for indexes in results]
subgs = []
for indexes in results:
tg = template_graph()
subg = self.g.subgraph(indexes)
tg.updateG(subg)
subgs.append(tg)
del tg
return subgs
def createGraph(self, results, n):
binary_value = int(results[0],2)
indexes = [int(v) for v in results[1:]]
fang = self.createG(results[0], n)
if fang:
tg = template_graph(binary_value)
tg.updateG(fang, indexes, self.g)
return tg
pdb.set_trace()
print("there is g which is none")
def createG(self, binary_str, n):
g = nx.DiGraph()
l = [int(v) for v in binary_str]
#pdb.set_trace()
shape = (n, n)
data = np.array(l)
ad_matrix = data.reshape(shape)
for i in xrange(n):
for j in xrange(n):
if ad_matrix[i][j] == 1:
g.add_edge(i, j)
return g
class raw_graphs: #创建空的list然后存储raw_graphs类的instance
def __init__(self, binary_name):
self.binary_name = binary_name
self.raw_graph_list = []
def append(self, raw_g):
self.raw_graph_list.append(raw_g)
def __len__(self):
return len(self.raw_graph_list)
class graphlets:
def __init__(self, funcname):
self.funcname = funcname
self.graphlets_list = []
self.binary_name = None
def updateBN(self, binary_name):
self.binary_name = binary_name
def append(self, subg):
self.graphlets_list.append(subg)
def appendSet(self, subgs):
self.graphlets_list += subgs
def __len__(self):
return len(self.graphlets_list)
class template_graph:
def __init__(self, value=None):
self.value = value
self.g = None
def updateG(self,g):
self.g = g
#def updateIndexes(self, indexes):
# self.indexes = indexes
#def updateAttributes(self, pg, indexes, maing):
# for id_ in xrange(len(indexes)):
# index = indexes[id_]
# gnode = self.findNode(index, maing)
# self.g.node[gnode] = pg.node[index]
class template_graphs:
def __init__(self, size):
self.size = size
self.gs = []
self.bit_len = None
def enumeratingAll(self):
subgs = []
binary_value = self.genBinValue()
for i in xrange(binary_value):
if i == 0 :
continue
g = self.createG(i)
if g:
tg = template_graph(i)
tg.updateG(g)
self.gs.append(tg)
def genBinValue(self):
n = self.size
self.bit_len = n*n
return 2**(self.bit_len)
def createG(self, i):
g = nx.DiGraph()
l = self.genArray(i)
#pdb.set_trace()
shape = (self.size, self.size)
data = np.array(l)
ad_matrix = data.reshape(shape)
for i in xrange(self.size):
for j in xrange(self.size):
if ad_matrix[i][j] == 1:
g.add_edge(i, j)
u_g = g.to_undirected()
if len(g) == self.size and nx.is_connected(u_g):
return g
return False
def genArray(self, i):
l = [int(x) for x in bin(i)[2:]]
x = [0 for v in xrange(self.bit_len - len(l))]
return x + l