Fixed argument parsing in eval scripts.
This commit is contained in:
parent
ecc71c6f9e
commit
fc88dc47cc
@ -30,9 +30,9 @@ def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = ArgumentParser("Run evaluation on citation data.")
|
parser = ArgumentParser("Run evaluation on citation data.")
|
||||||
parser.add_argument("dataset_dir", "Path to directory containing the dataset.")
|
parser.add_argument("dataset_dir", help="Path to directory containing the dataset.")
|
||||||
parser.add_argument("data_dir", "Path to directory containing the learned node embeddings.")
|
parser.add_argument("data_dir", help="Path to directory containing the learned node embeddings.")
|
||||||
parser.add_argument("setting", "Either val or test.")
|
parser.add_argument("setting", help="Either val or test.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
dataset_dir = args.dataset_dir
|
dataset_dir = args.dataset_dir
|
||||||
data_dir = args.data_dir
|
data_dir = args.data_dir
|
||||||
|
@ -11,7 +11,7 @@ def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
|||||||
from sklearn.dummy import DummyClassifier
|
from sklearn.dummy import DummyClassifier
|
||||||
from sklearn.metrics import f1_score
|
from sklearn.metrics import f1_score
|
||||||
from sklearn.multioutput import MultiOutputClassifier
|
from sklearn.multioutput import MultiOutputClassifier
|
||||||
dummy = MultiOutputClassifier(DummyClassifier(strategy='uniform'))
|
dummy = MultiOutputClassifier(DummyClassifier())
|
||||||
dummy.fit(train_embeds, train_labels)
|
dummy.fit(train_embeds, train_labels)
|
||||||
log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
|
log = MultiOutputClassifier(SGDClassifier(loss="log"), n_jobs=10)
|
||||||
log.fit(train_embeds, train_labels)
|
log.fit(train_embeds, train_labels)
|
||||||
@ -20,9 +20,9 @@ def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = ArgumentParser("Run evaluation on PPI data.")
|
parser = ArgumentParser("Run evaluation on PPI data.")
|
||||||
parser.add_argument("dataset_dir", "Path to directory containing the dataset.")
|
parser.add_argument("dataset_dir", help="Path to directory containing the dataset.")
|
||||||
parser.add_argument("data_dir", "Path to directory containing the learned node embeddings. Set to 'feat' for raw features.")
|
parser.add_argument("data_dir", help="Path to directory containing the learned node embeddings. Set to 'feat' for raw features.")
|
||||||
parser.add_argument("setting", "Either val or test.")
|
parser.add_argument("setting", help="Either val or test.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
dataset_dir = args.dataset_dir
|
dataset_dir = args.dataset_dir
|
||||||
data_dir = args.data_dir
|
data_dir = args.data_dir
|
||||||
@ -41,8 +41,8 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if data_dir == "feat":
|
if data_dir == "feat":
|
||||||
print("Using only features..")
|
print("Using only features..")
|
||||||
feats = np.load(data_dir + "/ppi-feats.npy")
|
feats = np.load(dataset_dir + "/ppi-feats.npy")
|
||||||
## Logistic gets through off by big counts, so log transform num comments and score
|
## Logistic gets thrown off by big counts, so log transform num comments and score
|
||||||
feats[:,0] = np.log(feats[:,0]+1.0)
|
feats[:,0] = np.log(feats[:,0]+1.0)
|
||||||
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
||||||
feat_id_map = json.load(open("/dfs/scratch0/graphnet/ppi/ppi-id_map.json"))
|
feat_id_map = json.load(open("/dfs/scratch0/graphnet/ppi/ppi-id_map.json"))
|
||||||
|
@ -12,23 +12,20 @@ def run_regression(train_embeds, train_labels, test_embeds, test_labels):
|
|||||||
from sklearn.metrics import f1_score
|
from sklearn.metrics import f1_score
|
||||||
dummy = DummyClassifier()
|
dummy = DummyClassifier()
|
||||||
dummy.fit(train_embeds, train_labels)
|
dummy.fit(train_embeds, train_labels)
|
||||||
log = SGDClassifier(loss="log", n_jobs=55, n_iter=50)
|
log = SGDClassifier(loss="log", n_jobs=55)
|
||||||
log.fit(train_embeds, train_labels)
|
log.fit(train_embeds, train_labels)
|
||||||
print("Test scores")
|
print("Test scores")
|
||||||
print(f1_score(test_labels, log.predict(test_embeds), average="micro"))
|
print(f1_score(test_labels, log.predict(test_embeds), average="micro"))
|
||||||
print(f1_score(test_labels, log.predict(test_embeds), average="macro"))
|
|
||||||
print("Train scores")
|
print("Train scores")
|
||||||
print(f1_score(train_labels, log.predict(train_embeds), average="micro"))
|
print(f1_score(train_labels, log.predict(train_embeds), average="micro"))
|
||||||
print(f1_score(train_labels, log.predict(train_embeds), average="macro"))
|
|
||||||
print("Random baseline")
|
print("Random baseline")
|
||||||
print(f1_score(test_labels, dummy.predict(test_embeds), average="micro"))
|
print(f1_score(test_labels, dummy.predict(test_embeds), average="micro"))
|
||||||
print(f1_score(test_labels, dummy.predict(test_embeds), average="macro"))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = ArgumentParser("Run evaluation on Reddit data.")
|
parser = ArgumentParser("Run evaluation on Reddit data.")
|
||||||
parser.add_argument("dataset_dir", "Path to directory containing the dataset.")
|
parser.add_argument("dataset_dir", help="Path to directory containing the dataset.")
|
||||||
parser.add_argument("data_dir", "Path to directory containing the learned node embeddings. Set to 'feat' for raw features.")
|
parser.add_argument("data_dir", help="Path to directory containing the learned node embeddings. Set to 'feat' for raw features.")
|
||||||
parser.add_argument("setting", "Either val or test.")
|
parser.add_argument("setting", help="Either val or test.")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
dataset_dir = args.dataset_dir
|
dataset_dir = args.dataset_dir
|
||||||
data_dir = args.data_dir
|
data_dir = args.data_dir
|
||||||
@ -37,8 +34,6 @@ if __name__ == '__main__':
|
|||||||
print("Loading data...")
|
print("Loading data...")
|
||||||
G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
|
G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json")))
|
||||||
labels = json.load(open(dataset_dir + "/reddit-class_map.json"))
|
labels = json.load(open(dataset_dir + "/reddit-class_map.json"))
|
||||||
data_dir = sys.argv[1]
|
|
||||||
setting = sys.argv[2]
|
|
||||||
|
|
||||||
train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
|
train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']]
|
||||||
test_ids = [n for n in G.nodes() if G.node[n][setting]]
|
test_ids = [n for n in G.nodes() if G.node[n][setting]]
|
||||||
@ -48,7 +43,7 @@ if __name__ == '__main__':
|
|||||||
if data_dir == "feat":
|
if data_dir == "feat":
|
||||||
print("Using only features..")
|
print("Using only features..")
|
||||||
feats = np.load(dataset_dir + "/reddit-feats.npy")
|
feats = np.load(dataset_dir + "/reddit-feats.npy")
|
||||||
## Logistic gets through off by big counts, so log transform num comments and score
|
## Logistic gets thrown off by big counts, so log transform num comments and score
|
||||||
feats[:,0] = np.log(feats[:,0]+1.0)
|
feats[:,0] = np.log(feats[:,0]+1.0)
|
||||||
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
feats[:,1] = np.log(feats[:,1]-min(np.min(feats[:,1]), -1))
|
||||||
feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
|
feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json"))
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
from networkx.readwrite import json_graph
|
from networkx.readwrite import json_graph
|
||||||
|
|
||||||
@ -52,7 +55,6 @@ def load_data(prefix, normalize=True):
|
|||||||
return G, feats, id_map, walks, class_map
|
return G, feats, id_map, walks, class_map
|
||||||
|
|
||||||
def run_random_walks(G, nodes, num_walks=N_WALKS):
|
def run_random_walks(G, nodes, num_walks=N_WALKS):
|
||||||
print("Subgraph for walks is of size", len(G))
|
|
||||||
pairs = []
|
pairs = []
|
||||||
for count, node in enumerate(nodes):
|
for count, node in enumerate(nodes):
|
||||||
if G.degree(node) == 0:
|
if G.degree(node) == 0:
|
||||||
@ -66,5 +68,17 @@ def run_random_walks(G, nodes, num_walks=N_WALKS):
|
|||||||
pairs.append((node,curr_node))
|
pairs.append((node,curr_node))
|
||||||
curr_node = next_node
|
curr_node = next_node
|
||||||
if count % 1000 == 0:
|
if count % 1000 == 0:
|
||||||
print(count)
|
print("Done walks for", count, "nodes")
|
||||||
return pairs
|
return pairs
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
""" Run random walks """
|
||||||
|
graph_file = sys.argv[1]
|
||||||
|
out_file = sys.argv[2]
|
||||||
|
G_data = json.load(open(graph_file))
|
||||||
|
G = json_graph.node_link_graph(G_data)
|
||||||
|
nodes = [n for n in G.nodes() if not G.node[n]["val"] and not G.node[n]["test"]]
|
||||||
|
G = G.subgraph(nodes)
|
||||||
|
pairs = run_random_walks(G, nodes)
|
||||||
|
with open(out_file, "w") as fp:
|
||||||
|
fp.write("\n".join([p[0] + "\t" + p[1] for p in pairs]))
|
||||||
|
Loading…
Reference in New Issue
Block a user