Added Cora.
This commit is contained in:
parent
65cb65c9c0
commit
10e1084116
@ -31,6 +31,75 @@ class SupervisedGraphSage(nn.Module):
|
|||||||
scores = self.forward(nodes)
|
scores = self.forward(nodes)
|
||||||
return self.xent(scores, labels.squeeze())
|
return self.xent(scores, labels.squeeze())
|
||||||
|
|
||||||
|
def load_cora():
|
||||||
|
num_nodes = 2708
|
||||||
|
num_feats = 1433
|
||||||
|
feat_data = np.zeros((num_nodes, num_feats))
|
||||||
|
labels = np.empty((num_nodes,1), dtype=np.int64)
|
||||||
|
node_map = {}
|
||||||
|
label_map = {}
|
||||||
|
with open("cora/cora.content") as fp:
|
||||||
|
for i,line in enumerate(fp):
|
||||||
|
info = line.strip().split()
|
||||||
|
feat_data[i,:] = map(float, info[1:-1])
|
||||||
|
node_map[info[0]] = i
|
||||||
|
if not info[-1] in label_map:
|
||||||
|
label_map[info[-1]] = len(label_map)
|
||||||
|
labels[i] = label_map[info[-1]]
|
||||||
|
|
||||||
|
adj_lists = defaultdict(set)
|
||||||
|
with open("cora/cora.cites") as fp:
|
||||||
|
for i,line in enumerate(fp):
|
||||||
|
info = line.strip().split()
|
||||||
|
paper1 = node_map[info[0]]
|
||||||
|
paper2 = node_map[info[1]]
|
||||||
|
adj_lists[paper1].add(paper2)
|
||||||
|
adj_lists[paper2].add(paper1)
|
||||||
|
return feat_data, labels, adj_lists
|
||||||
|
|
||||||
|
def run_cora():
|
||||||
|
np.random.seed(1)
|
||||||
|
random.seed(1)
|
||||||
|
num_nodes = 2708
|
||||||
|
feat_data, labels, adj_lists = load_cora()
|
||||||
|
features = nn.Embedding(2708, 1433)
|
||||||
|
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
|
||||||
|
# features.cuda()
|
||||||
|
|
||||||
|
agg1 = MeanAggregator(features, cuda=True)
|
||||||
|
enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False)
|
||||||
|
agg2 = MeanAggregator(lambda nodes : enc1(nodes).t(), cuda=False)
|
||||||
|
enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2,
|
||||||
|
base_model=enc1, gcn=True, cuda=False)
|
||||||
|
enc1.num_samples = 5
|
||||||
|
enc2.num_samples = 5
|
||||||
|
|
||||||
|
graphsage = SupervisedGraphSage(7, enc2)
|
||||||
|
# graphsage.cuda()
|
||||||
|
rand_indices = np.random.permutation(num_nodes)
|
||||||
|
test = rand_indices[:1000]
|
||||||
|
val = rand_indices[1000:1500]
|
||||||
|
train = list(rand_indices[1500:])
|
||||||
|
|
||||||
|
optimizer = torch.optim.SGD(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=0.7)
|
||||||
|
times = []
|
||||||
|
for batch in range(100):
|
||||||
|
batch_nodes = train[:256]
|
||||||
|
random.shuffle(train)
|
||||||
|
start_time = time.time()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
loss = graphsage.loss(batch_nodes,
|
||||||
|
Variable(torch.LongTensor(labels[np.array(batch_nodes)])))
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
end_time = time.time()
|
||||||
|
times.append(end_time-start_time)
|
||||||
|
print batch, loss.data[0]
|
||||||
|
|
||||||
|
val_output = graphsage.forward(val)
|
||||||
|
print "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")
|
||||||
|
print "Average batch time:", np.mean(times)
|
||||||
|
|
||||||
def load_pubmed():
|
def load_pubmed():
|
||||||
#hardcoded for simplicity...
|
#hardcoded for simplicity...
|
||||||
num_nodes = 19717
|
num_nodes = 19717
|
||||||
@ -60,11 +129,10 @@ def load_pubmed():
|
|||||||
adj_lists[paper2].add(paper1)
|
adj_lists[paper2].add(paper1)
|
||||||
return feat_data, labels, adj_lists
|
return feat_data, labels, adj_lists
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def run_pubmed():
|
||||||
np.random.seed(1)
|
np.random.seed(1)
|
||||||
random.seed(1)
|
random.seed(1)
|
||||||
num_nodes = 19717
|
num_nodes = 19717
|
||||||
num_feats = 500
|
|
||||||
feat_data, labels, adj_lists = load_pubmed()
|
feat_data, labels, adj_lists = load_pubmed()
|
||||||
features = nn.Embedding(19717, 500)
|
features = nn.Embedding(19717, 500)
|
||||||
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
|
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
|
||||||
@ -103,3 +171,6 @@ if __name__ == "__main__":
|
|||||||
val_output = graphsage.forward(val)
|
val_output = graphsage.forward(val)
|
||||||
print "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")
|
print "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")
|
||||||
print "Average batch time:", np.mean(times)
|
print "Average batch time:", np.mean(times)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_cora()
|
||||||
|
Loading…
Reference in New Issue
Block a user