Added Cora.
This commit is contained in:
parent
65cb65c9c0
commit
10e1084116
@ -31,6 +31,75 @@ class SupervisedGraphSage(nn.Module):
|
||||
scores = self.forward(nodes)
|
||||
return self.xent(scores, labels.squeeze())
|
||||
|
||||
def load_cora():
|
||||
num_nodes = 2708
|
||||
num_feats = 1433
|
||||
feat_data = np.zeros((num_nodes, num_feats))
|
||||
labels = np.empty((num_nodes,1), dtype=np.int64)
|
||||
node_map = {}
|
||||
label_map = {}
|
||||
with open("cora/cora.content") as fp:
|
||||
for i,line in enumerate(fp):
|
||||
info = line.strip().split()
|
||||
feat_data[i,:] = map(float, info[1:-1])
|
||||
node_map[info[0]] = i
|
||||
if not info[-1] in label_map:
|
||||
label_map[info[-1]] = len(label_map)
|
||||
labels[i] = label_map[info[-1]]
|
||||
|
||||
adj_lists = defaultdict(set)
|
||||
with open("cora/cora.cites") as fp:
|
||||
for i,line in enumerate(fp):
|
||||
info = line.strip().split()
|
||||
paper1 = node_map[info[0]]
|
||||
paper2 = node_map[info[1]]
|
||||
adj_lists[paper1].add(paper2)
|
||||
adj_lists[paper2].add(paper1)
|
||||
return feat_data, labels, adj_lists
|
||||
|
||||
def run_cora():
|
||||
np.random.seed(1)
|
||||
random.seed(1)
|
||||
num_nodes = 2708
|
||||
feat_data, labels, adj_lists = load_cora()
|
||||
features = nn.Embedding(2708, 1433)
|
||||
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
|
||||
# features.cuda()
|
||||
|
||||
agg1 = MeanAggregator(features, cuda=True)
|
||||
enc1 = Encoder(features, 1433, 128, adj_lists, agg1, gcn=True, cuda=False)
|
||||
agg2 = MeanAggregator(lambda nodes : enc1(nodes).t(), cuda=False)
|
||||
enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2,
|
||||
base_model=enc1, gcn=True, cuda=False)
|
||||
enc1.num_samples = 5
|
||||
enc2.num_samples = 5
|
||||
|
||||
graphsage = SupervisedGraphSage(7, enc2)
|
||||
# graphsage.cuda()
|
||||
rand_indices = np.random.permutation(num_nodes)
|
||||
test = rand_indices[:1000]
|
||||
val = rand_indices[1000:1500]
|
||||
train = list(rand_indices[1500:])
|
||||
|
||||
optimizer = torch.optim.SGD(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=0.7)
|
||||
times = []
|
||||
for batch in range(100):
|
||||
batch_nodes = train[:256]
|
||||
random.shuffle(train)
|
||||
start_time = time.time()
|
||||
optimizer.zero_grad()
|
||||
loss = graphsage.loss(batch_nodes,
|
||||
Variable(torch.LongTensor(labels[np.array(batch_nodes)])))
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
end_time = time.time()
|
||||
times.append(end_time-start_time)
|
||||
print batch, loss.data[0]
|
||||
|
||||
val_output = graphsage.forward(val)
|
||||
print "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")
|
||||
print "Average batch time:", np.mean(times)
|
||||
|
||||
def load_pubmed():
|
||||
#hardcoded for simplicity...
|
||||
num_nodes = 19717
|
||||
@ -60,11 +129,10 @@ def load_pubmed():
|
||||
adj_lists[paper2].add(paper1)
|
||||
return feat_data, labels, adj_lists
|
||||
|
||||
if __name__ == "__main__":
|
||||
def run_pubmed():
|
||||
np.random.seed(1)
|
||||
random.seed(1)
|
||||
num_nodes = 19717
|
||||
num_feats = 500
|
||||
feat_data, labels, adj_lists = load_pubmed()
|
||||
features = nn.Embedding(19717, 500)
|
||||
features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False)
|
||||
@ -103,3 +171,6 @@ if __name__ == "__main__":
|
||||
val_output = graphsage.forward(val)
|
||||
print "Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")
|
||||
print "Average batch time:", np.mean(times)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_cora()
|
||||
|
Loading…
Reference in New Issue
Block a user