ppi eval
This commit is contained in:
parent
2a3bfbcb5a
commit
5f66eec217
@ -5,6 +5,13 @@ import numpy as np
|
|||||||
from networkx.readwrite import json_graph
|
from networkx.readwrite import json_graph
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
''' To evaluate the embeddings, we run a logistic regression.
|
||||||
|
Run this script after running unsupervised training.
|
||||||
|
Baseline of using features-only can be run by setting data_dir as 'feat'
|
||||||
|
Example:
|
||||||
|
python eval_scripts/ppi_eval.py ../data/ppi unsup-ppi/n2v_big_0.000010 test
|
||||||
|
'''
|
||||||
|
|
||||||
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
||||||
np.random.seed(1)
|
np.random.seed(1)
|
||||||
from sklearn.linear_model import SGDClassifier
|
from sklearn.linear_model import SGDClassifier
|
||||||
@ -15,8 +22,12 @@ def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
|||||||
dummy.fit(train_embeds, train_labels)
|
dummy.fit(train_embeds, train_labels)
|
||||||
log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
|
log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
|
||||||
log.fit(train_embeds, train_labels)
|
log.fit(train_embeds, train_labels)
|
||||||
print("F1 score", f1_score(test_labels, log.predict(test_embeds), average="micro"))
|
|
||||||
print("Random baseline F1 score", f1_score(test_labels, dummy.predict(test_embeds), average="micro"))
|
f1 = 0
|
||||||
|
for i in range(test_labels.shape[1]):
|
||||||
|
print("F1 score", f1_score(test_labels[:,i], log.predict(test_embeds)[:,i], average="micro"))
|
||||||
|
for i in range(test_labels.shape[1]):
|
||||||
|
print("Random baseline F1 score", f1_score(test_labels[:,i], dummy.predict(test_embeds)[:,i], average="micro"))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = ArgumentParser("Run evaluation on PPI data.")
|
parser = ArgumentParser("Run evaluation on PPI data.")
|
||||||
@ -30,12 +41,14 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
print("Loading data...")
|
print("Loading data...")
|
||||||
G = json_graph.node_link_graph(json.load(open(dataset_dir + "/ppi-G.json")))
|
G = json_graph.node_link_graph(json.load(open(dataset_dir + "/ppi-G.json")))
|
||||||
labels = json.load(open("/dfs/scratch0/graphnet/ppi/ppi-class_map.json"))
|
labels = json.load(open(dataset_dir + "/ppi-class_map.json"))
|
||||||
labels = {int(i):l for i, l in labels.iteritems()}
|
labels = {int(i):l for i, l in labels.iteritems()}
|
||||||
|
|
||||||
train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
|
train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
|
||||||
test_ids = [n for n in G.nodes() if G.node[n][setting]]
|
test_ids = [n for n in G.nodes() if G.node[n][setting]]
|
||||||
train_labels = np.array([labels[i] for i in train_ids])
|
train_labels = np.array([labels[i] for i in train_ids])
|
||||||
|
if train_labels.ndim == 1:
|
||||||
|
train_labels = np.expand_dims(train_labels, 1)
|
||||||
test_labels = np.array([labels[i] for i in test_ids])
|
test_labels = np.array([labels[i] for i in test_ids])
|
||||||
print("running", data_dir)
|
print("running", data_dir)
|
||||||
|
|
||||||
@ -45,7 +58,7 @@ if __name__ == '__main__':
|
|||||||
## Logistic gets thrown off by big counts, so log transform num comments and score
|
## Logistic gets thrown off by big counts, so log transform num comments and score
|
||||||
feats[:,0] = np.log(feats[:,0]+1.0)
|
feats[:,0] = np.log(feats[:,0]+1.0)
|
||||||
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
||||||
feat_id_map = json.load(open("/dfs/scratch0/graphnet/ppi/ppi-id_map.json"))
|
feat_id_map = json.load(open(dataset_dir + "/ppi-id_map.json"))
|
||||||
feat_id_map = {int(id):val for id,val in feat_id_map.iteritems()}
|
feat_id_map = {int(id):val for id,val in feat_id_map.iteritems()}
|
||||||
train_feats = feats[[feat_id_map[id] for id in train_ids]]
|
train_feats = feats[[feat_id_map[id] for id in train_ids]]
|
||||||
test_feats = feats[[feat_id_map[id] for id in test_ids]]
|
test_feats = feats[[feat_id_map[id] for id in test_ids]]
|
||||||
|
@ -125,6 +125,9 @@ class EdgeMinibatchIterator(object):
|
|||||||
batch_edges = self.train_edges[start : start + self.batch_size]
|
batch_edges = self.train_edges[start : start + self.batch_size]
|
||||||
return self.batch_feed_dict(batch_edges)
|
return self.batch_feed_dict(batch_edges)
|
||||||
|
|
||||||
|
def num_training_batches(self):
|
||||||
|
return len(self.train_edges) // self.batch_size + 1
|
||||||
|
|
||||||
def val_feed_dict(self, size=None):
|
def val_feed_dict(self, size=None):
|
||||||
edge_list = self.val_edges
|
edge_list = self.val_edges
|
||||||
if size is None:
|
if size is None:
|
||||||
@ -287,6 +290,9 @@ class NodeMinibatchIterator(object):
|
|||||||
ret_val = self.batch_feed_dict(val_node_subset)
|
ret_val = self.batch_feed_dict(val_node_subset)
|
||||||
return ret_val[0], ret_val[1], (iter_num+1)*size >= len(val_nodes), val_node_subset
|
return ret_val[0], ret_val[1], (iter_num+1)*size >= len(val_nodes), val_node_subset
|
||||||
|
|
||||||
|
def num_training_batches(self):
|
||||||
|
return len(self.train_nodes) // self.batch_size + 1
|
||||||
|
|
||||||
def next_minibatch_feed_dict(self):
|
def next_minibatch_feed_dict(self):
|
||||||
start = self.batch_num * self.batch_size
|
start = self.batch_num * self.batch_size
|
||||||
self.batch_num += 1
|
self.batch_num += 1
|
||||||
|
@ -81,4 +81,4 @@ if __name__ == "__main__":
|
|||||||
G = G.subgraph(nodes)
|
G = G.subgraph(nodes)
|
||||||
pairs = run_random_walks(G, nodes)
|
pairs = run_random_walks(G, nodes)
|
||||||
with open(out_file, "w") as fp:
|
with open(out_file, "w") as fp:
|
||||||
fp.write("\n".join([p[0] + "\t" + p[1] for p in pairs]))
|
fp.write("\n".join([str(p[0]) + "\t" + str(p[1]) for p in pairs]))
|
||||||
|
Loading…
Reference in New Issue
Block a user