graphsage-tf/graphsage/minibatch.py

from __future__ import division
from __future__ import print_function

import numpy as np

np.random.seed(123)

class EdgeMinibatchIterator(object):
    
    """ This minibatch iterator iterates over batches of sampled edges or
    random pairs of co-occuring edges.

    G -- networkx graph
    id2idx -- dict mapping node ids to index in feature tensor
    placeholders -- tensorflow placeholders object
    context_pairs -- if not none, then a list of co-occuring node pairs (from random walks)
    batch_size -- size of the minibatches
    max_degree -- maximum size of the downsampled adjacency lists
    n2v_retrain -- signals that the iterator is being used to add new embeddings to a n2v model
    fixed_n2v -- signals that the iterator is being used to retrain n2v with only existing nodes as context
    """
    def __init__(self, G, id2idx, 
            placeholders, context_pairs=None, batch_size=100, max_degree=25,
            n2v_retrain=False, fixed_n2v=False,
            **kwargs):

        self.G = G
        self.nodes = G.nodes()
        self.id2idx = id2idx
        self.placeholders = placeholders
        self.batch_size = batch_size
        self.max_degree = max_degree
        self.batch_num = 0

        self.nodes = np.random.permutation(G.nodes())
        self.adj, self.deg = self.construct_adj()
        self.test_adj = self.construct_test_adj()
        if context_pairs is None:
            edges = G.edges()
        else:
            edges = context_pairs
        self.train_edges = self.edges = np.random.permutation(edges)
        if not n2v_retrain:
            self.train_edges = self._remove_isolated(self.train_edges)
            self.val_edges = [e for e in G.edges() if G[e[0]][e[1]]['train_removed']]
        else:
            if fixed_n2v:
                self.train_edges = self.val_edges = self._n2v_prune(self.edges)
            else:
                self.train_edges = self.val_edges = self.edges

        print(len([n for n in G.nodes() if not G.node[n]['test'] and not G.node[n]['val']]), 'train nodes')
        print(len([n for n in G.nodes() if G.node[n]['test'] or G.node[n]['val']]), 'test nodes')
        self.val_set_size = len(self.val_edges)

    def _n2v_prune(self, edges):
        is_val = lambda n : self.G.node[n]["val"] or self.G.node[n]["test"]
        return [e for e in edges if not is_val(e[1])]

    def _remove_isolated(self, edge_list):
        new_edge_list = []
        missing = 0
        for n1, n2 in edge_list:
            if not n1 in self.G.node or not n2 in self.G.node:
                missing += 1
                continue
            if (self.deg[self.id2idx[n1]] == 0 or self.deg[self.id2idx[n2]] == 0) \
                    and (not self.G.node[n1]['test'] or self.G.node[n1]['val']) \
                    and (not self.G.node[n2]['test'] or self.G.node[n2]['val']):
                continue
            else:
                new_edge_list.append((n1,n2))
        print("Unexpected missing:", missing)
        return new_edge_list

    def construct_adj(self):
        adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))
        deg = np.zeros((len(self.id2idx),))

        for nodeid in self.G.nodes():
            if self.G.node[nodeid]['test'] or self.G.node[nodeid]['val']:
                continue
            neighbors = np.array([self.id2idx[neighbor] 
                for neighbor in self.G.neighbors(nodeid)
                if (not self.G[nodeid][neighbor]['train_removed'])])
            deg[self.id2idx[nodeid]] = len(neighbors)
            if len(neighbors) == 0:
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[self.id2idx[nodeid], :] = neighbors
        return adj, deg

    def construct_test_adj(self):
        adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))
        for nodeid in self.G.nodes():
            neighbors = np.array([self.id2idx[neighbor] 
                for neighbor in self.G.neighbors(nodeid)])
            if len(neighbors) == 0:
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[self.id2idx[nodeid], :] = neighbors
        return adj

    def end(self):
        return self.batch_num * self.batch_size > len(self.train_edges) - self.batch_size + 1

    def batch_feed_dict(self, batch_edges):
        batch1 = []
        batch2 = []
        for node1, node2 in batch_edges:
            batch1.append(self.id2idx[node1])
            batch2.append(self.id2idx[node2])

        feed_dict = dict()
        feed_dict.update({self.placeholders['batch_size'] : len(batch_edges)})
        feed_dict.update({self.placeholders['batch1']: batch1})
        feed_dict.update({self.placeholders['batch2']: batch2})

        return feed_dict

    def next_minibatch_feed_dict(self):
        start = self.batch_num * self.batch_size
        self.batch_num += 1
        batch_edges = self.train_edges[start : start + self.batch_size]
        return self.batch_feed_dict(batch_edges)

    def num_training_batches(self):
        return len(self.train_edges) // self.batch_size + 1

    def val_feed_dict(self, size=None):
        edge_list = self.val_edges
        if size is None:
            return self.batch_feed_dict(edge_list)
        else:
            ind = np.random.permutation(len(edge_list))
            val_edges = [edge_list[i] for i in ind[:min(size, len(ind))]]
            return self.batch_feed_dict(val_edges)

    def incremental_val_feed_dict(self, size, iter_num):
        edge_list = self.val_edges
        val_edges = edge_list[iter_num*size:min((iter_num+1)*size, 
            len(edge_list))]
        return self.batch_feed_dict(val_edges), (iter_num+1)*size >= len(self.val_edges), val_edges

    def incremental_embed_feed_dict(self, size, iter_num):
        node_list = self.nodes
        val_nodes = node_list[iter_num*size:min((iter_num+1)*size, 
            len(node_list))]
        val_edges = [(n,n) for n in val_nodes]
        return self.batch_feed_dict(val_edges), (iter_num+1)*size >= len(node_list), val_edges

    def label_val(self):
        train_edges = []
        val_edges = []
        for n1, n2 in self.G.edges():
            if (self.G.node[n1]['val'] or self.G.node[n1]['test'] 
                    or self.G.node[n2]['val'] or self.G.node[n2]['test']):
                val_edges.append((n1,n2))
            else:
                train_edges.append((n1,n2))
        return train_edges, val_edges

    def shuffle(self):
        """ Re-shuffle the training set.
            Also reset the batch number.
        """
        self.train_edges = np.random.permutation(self.train_edges)
        self.nodes = np.random.permutation(self.nodes)
        self.batch_num = 0

class NodeMinibatchIterator(object):
    
    """ 
    This minibatch iterator iterates over nodes for supervised learning.

    G -- networkx graph
    id2idx -- dict mapping node ids to integer values indexing feature tensor
    placeholders -- standard tensorflow placeholders object for feeding
    label_map -- map from node ids to class values (integer or list)
    num_classes -- number of output classes
    batch_size -- size of the minibatches
    max_degree -- maximum size of the downsampled adjacency lists
    """
    def __init__(self, G, id2idx, 
            placeholders, label_map, num_classes, 
            batch_size=100, max_degree=25,
            **kwargs):

        self.G = G
        self.nodes = G.nodes()
        self.id2idx = id2idx
        self.placeholders = placeholders
        self.batch_size = batch_size
        self.max_degree = max_degree
        self.batch_num = 0
        self.label_map = label_map
        self.num_classes = num_classes

        self.adj, self.deg = self.construct_adj()
        self.test_adj = self.construct_test_adj()

        self.val_nodes = [n for n in self.G.nodes() if self.G.node[n]['val']]
        self.test_nodes = [n for n in self.G.nodes() if self.G.node[n]['test']]

        self.no_train_nodes_set = set(self.val_nodes + self.test_nodes)
        self.train_nodes = set(G.nodes()).difference(self.no_train_nodes_set)
        # don't train on nodes that only have edges to test set
        self.train_nodes = [n for n in self.train_nodes if self.deg[id2idx[n]] > 0]

    def _make_label_vec(self, node):
        label = self.label_map[node]
        if isinstance(label, list):
            label_vec = np.array(label)
        else:
            label_vec = np.zeros((self.num_classes))
            class_ind = self.label_map[node]
            label_vec[class_ind] = 1
        return label_vec

    def construct_adj(self):
        adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))
        deg = np.zeros((len(self.id2idx),))

        for nodeid in self.G.nodes():
            if self.G.node[nodeid]['test'] or self.G.node[nodeid]['val']:
                continue
            neighbors = np.array([self.id2idx[neighbor] 
                for neighbor in self.G.neighbors(nodeid)
                if (not self.G[nodeid][neighbor]['train_removed'])])
            deg[self.id2idx[nodeid]] = len(neighbors)
            if len(neighbors) == 0:
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[self.id2idx[nodeid], :] = neighbors
        return adj, deg

    def construct_test_adj(self):
        adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))
        for nodeid in self.G.nodes():
            neighbors = np.array([self.id2idx[neighbor] 
                for neighbor in self.G.neighbors(nodeid)])
            if len(neighbors) == 0:
                continue
            if len(neighbors) > self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=False)
            elif len(neighbors) < self.max_degree:
                neighbors = np.random.choice(neighbors, self.max_degree, replace=True)
            adj[self.id2idx[nodeid], :] = neighbors
        return adj

    def end(self):
        return self.batch_num * self.batch_size > len(self.train_nodes) - self.batch_size 

    def batch_feed_dict(self, batch_nodes, val=False):
        batch1id = batch_nodes
        batch1 = [self.id2idx[n] for n in batch1id]
              
        labels = np.vstack([self._make_label_vec(node) for node in batch1id])
        feed_dict = dict()
        feed_dict.update({self.placeholders['batch_size'] : len(batch1)})
        feed_dict.update({self.placeholders['batch']: batch1})
        feed_dict.update({self.placeholders['labels']: labels})

        return feed_dict, labels

    def node_val_feed_dict(self, size=None, test=False):
        if test:
            val_nodes = self.test_nodes
        else:
            val_nodes = self.val_nodes
        if not size is None:
            val_nodes = np.random.choice(val_nodes, size, replace=True)
        # add a dummy neighbor
        ret_val = self.batch_feed_dict(val_nodes)
        return ret_val[0], ret_val[1]

    def incremental_node_val_feed_dict(self, size, iter_num, test=False):
        if test:
            val_nodes = self.test_nodes
        else:
            val_nodes = self.val_nodes
        val_node_subset = val_nodes[iter_num*size:min((iter_num+1)*size, 
            len(val_nodes))]

        # add a dummy neighbor
        ret_val = self.batch_feed_dict(val_node_subset)
        return ret_val[0], ret_val[1], (iter_num+1)*size >= len(val_nodes), val_node_subset

    def num_training_batches(self):
        return len(self.train_nodes) // self.batch_size + 1

    def next_minibatch_feed_dict(self):
        start = self.batch_num * self.batch_size
        self.batch_num += 1
        batch_nodes = self.train_nodes[start : start + self.batch_size]
        return self.batch_feed_dict(batch_nodes)

    def incremental_embed_feed_dict(self, size, iter_num):
        node_list = self.nodes
        val_nodes = node_list[iter_num*size:min((iter_num+1)*size, 
            len(node_list))]
        return self.batch_feed_dict(val_nodes), (iter_num+1)*size >= len(node_list), val_nodes

    def shuffle(self):
        """ Re-shuffle the training set.
            Also reset the batch number.
        """
        self.train_nodes = np.random.permutation(self.train_nodes)
        self.batch_num = 0
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`from __future__ import division`
			`from __future__ import print_function`

			`import numpy as np`

			`np.random.seed(123)`

			`class EdgeMinibatchIterator(object):`

			`""" This minibatch iterator iterates over batches of sampled edges or`
			`random pairs of co-occuring edges.`
Cleaning up comments etc. 2017-05-31 21:39:04 +08:00
			`G -- networkx graph`
			`id2idx -- dict mapping node ids to index in feature tensor`
			`placeholders -- tensorflow placeholders object`
			`context_pairs -- if not none, then a list of co-occuring node pairs (from random walks)`
			`batch_size -- size of the minibatches`
			`max_degree -- maximum size of the downsampled adjacency lists`
			`n2v_retrain -- signals that the iterator is being used to add new embeddings to a n2v model`
			`fixed_n2v -- signals that the iterator is being used to retrain n2v with only existing nodes as context`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`"""`
			`def __init__(self, G, id2idx,`
Cleaning up comments etc. 2017-05-31 21:39:04 +08:00			`placeholders, context_pairs=None, batch_size=100, max_degree=25,`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`n2v_retrain=False, fixed_n2v=False,`
			`**kwargs):`

			`self.G = G`
			`self.nodes = G.nodes()`
			`self.id2idx = id2idx`
			`self.placeholders = placeholders`
			`self.batch_size = batch_size`
			`self.max_degree = max_degree`
			`self.batch_num = 0`

			`self.nodes = np.random.permutation(G.nodes())`
			`self.adj, self.deg = self.construct_adj()`
			`self.test_adj = self.construct_test_adj()`
			`if context_pairs is None:`
			`edges = G.edges()`
			`else:`
			`edges = context_pairs`
			`self.train_edges = self.edges = np.random.permutation(edges)`
			`if not n2v_retrain:`
			`self.train_edges = self._remove_isolated(self.train_edges)`
Python 3 support. 2017-10-12 05:05:36 +08:00			`self.val_edges = [e for e in G.edges() if G[e[0]][e[1]]['train_removed']]`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`else:`
			`if fixed_n2v:`
			`self.train_edges = self.val_edges = self._n2v_prune(self.edges)`
			`else:`
			`self.train_edges = self.val_edges = self.edges`

Python 3 support. 2017-10-12 05:05:36 +08:00			`print(len([n for n in G.nodes() if not G.node[n]['test'] and not G.node[n]['val']]), 'train nodes')`
			`print(len([n for n in G.nodes() if G.node[n]['test'] or G.node[n]['val']]), 'test nodes')`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`self.val_set_size = len(self.val_edges)`

			`def _n2v_prune(self, edges):`
			`is_val = lambda n : self.G.node[n]["val"] or self.G.node[n]["test"]`
			`return [e for e in edges if not is_val(e[1])]`

			`def _remove_isolated(self, edge_list):`
			`new_edge_list = []`
Cleaned up experimental run files. 2017-10-13 05:15:21 +08:00			`missing = 0`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`for n1, n2 in edge_list:`
Cleaned up experimental run files. 2017-10-13 05:15:21 +08:00			`if not n1 in self.G.node or not n2 in self.G.node:`
			`missing += 1`
			`continue`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`if (self.deg[self.id2idx[n1]] == 0 or self.deg[self.id2idx[n2]] == 0) \`
			`and (not self.G.node[n1]['test'] or self.G.node[n1]['val']) \`
			`and (not self.G.node[n2]['test'] or self.G.node[n2]['val']):`
			`continue`
			`else:`
			`new_edge_list.append((n1,n2))`
Cleaned up experimental run files. 2017-10-13 05:15:21 +08:00			`print("Unexpected missing:", missing)`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`return new_edge_list`

			`def construct_adj(self):`
			`adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))`
			`deg = np.zeros((len(self.id2idx),))`

			`for nodeid in self.G.nodes():`
			`if self.G.node[nodeid]['test'] or self.G.node[nodeid]['val']:`
			`continue`
			`neighbors = np.array([self.id2idx[neighbor]`
			`for neighbor in self.G.neighbors(nodeid)`
			`if (not self.G[nodeid][neighbor]['train_removed'])])`
			`deg[self.id2idx[nodeid]] = len(neighbors)`
			`if len(neighbors) == 0:`
			`continue`
			`if len(neighbors) > self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=False)`
			`elif len(neighbors) < self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=True)`
			`adj[self.id2idx[nodeid], :] = neighbors`
			`return adj, deg`

			`def construct_test_adj(self):`
			`adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))`
			`for nodeid in self.G.nodes():`
			`neighbors = np.array([self.id2idx[neighbor]`
			`for neighbor in self.G.neighbors(nodeid)])`
			`if len(neighbors) == 0:`
			`continue`
			`if len(neighbors) > self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=False)`
			`elif len(neighbors) < self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=True)`
			`adj[self.id2idx[nodeid], :] = neighbors`
			`return adj`

			`def end(self):`
			`return self.batch_num * self.batch_size > len(self.train_edges) - self.batch_size + 1`

			`def batch_feed_dict(self, batch_edges):`
			`batch1 = []`
			`batch2 = []`
			`for node1, node2 in batch_edges:`
			`batch1.append(self.id2idx[node1])`
			`batch2.append(self.id2idx[node2])`

			`feed_dict = dict()`
			`feed_dict.update({self.placeholders['batch_size'] : len(batch_edges)})`
			`feed_dict.update({self.placeholders['batch1']: batch1})`
			`feed_dict.update({self.placeholders['batch2']: batch2})`

			`return feed_dict`

			`def next_minibatch_feed_dict(self):`
			`start = self.batch_num * self.batch_size`
			`self.batch_num += 1`
			`batch_edges = self.train_edges[start : start + self.batch_size]`
			`return self.batch_feed_dict(batch_edges)`

ppi eval 2017-10-14 04:29:31 +08:00			`def num_training_batches(self):`
			`return len(self.train_edges) // self.batch_size + 1`

Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`def val_feed_dict(self, size=None):`
			`edge_list = self.val_edges`
			`if size is None:`
			`return self.batch_feed_dict(edge_list)`
			`else:`
			`ind = np.random.permutation(len(edge_list))`
			`val_edges = [edge_list[i] for i in ind[:min(size, len(ind))]]`
			`return self.batch_feed_dict(val_edges)`

			`def incremental_val_feed_dict(self, size, iter_num):`
			`edge_list = self.val_edges`
			`val_edges = edge_list[iter_numsize:min((iter_num+1)size,`
			`len(edge_list))]`
			`return self.batch_feed_dict(val_edges), (iter_num+1)*size >= len(self.val_edges), val_edges`

			`def incremental_embed_feed_dict(self, size, iter_num):`
			`node_list = self.nodes`
			`val_nodes = node_list[iter_numsize:min((iter_num+1)size,`
			`len(node_list))]`
			`val_edges = [(n,n) for n in val_nodes]`
			`return self.batch_feed_dict(val_edges), (iter_num+1)*size >= len(node_list), val_edges`

			`def label_val(self):`
			`train_edges = []`
			`val_edges = []`
Python 3 support. 2017-10-12 05:05:36 +08:00			`for n1, n2 in self.G.edges():`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`if (self.G.node[n1]['val'] or self.G.node[n1]['test']`
			`or self.G.node[n2]['val'] or self.G.node[n2]['test']):`
			`val_edges.append((n1,n2))`
			`else:`
			`train_edges.append((n1,n2))`
			`return train_edges, val_edges`

			`def shuffle(self):`
			`""" Re-shuffle the training set.`
			`Also reset the batch number.`
			`"""`
			`self.train_edges = np.random.permutation(self.train_edges)`
			`self.nodes = np.random.permutation(self.nodes)`
			`self.batch_num = 0`

			`class NodeMinibatchIterator(object):`

			`"""`
			`This minibatch iterator iterates over nodes for supervised learning.`
Cleaning up comments etc. 2017-05-31 21:39:04 +08:00
			`G -- networkx graph`
			`id2idx -- dict mapping node ids to integer values indexing feature tensor`
			`placeholders -- standard tensorflow placeholders object for feeding`
			`label_map -- map from node ids to class values (integer or list)`
			`num_classes -- number of output classes`
			`batch_size -- size of the minibatches`
			`max_degree -- maximum size of the downsampled adjacency lists`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`"""`
			`def __init__(self, G, id2idx,`
Cleaning up comments etc. 2017-05-31 21:39:04 +08:00			`placeholders, label_map, num_classes,`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`batch_size=100, max_degree=25,`
			`**kwargs):`

			`self.G = G`
			`self.nodes = G.nodes()`
			`self.id2idx = id2idx`
			`self.placeholders = placeholders`
			`self.batch_size = batch_size`
			`self.max_degree = max_degree`
			`self.batch_num = 0`
			`self.label_map = label_map`
			`self.num_classes = num_classes`

			`self.adj, self.deg = self.construct_adj()`
			`self.test_adj = self.construct_test_adj()`

Python 3 support. 2017-10-12 05:05:36 +08:00			`self.val_nodes = [n for n in self.G.nodes() if self.G.node[n]['val']]`
			`self.test_nodes = [n for n in self.G.nodes() if self.G.node[n]['test']]`
Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00
			`self.no_train_nodes_set = set(self.val_nodes + self.test_nodes)`
			`self.train_nodes = set(G.nodes()).difference(self.no_train_nodes_set)`
			`# don't train on nodes that only have edges to test set`
			`self.train_nodes = [n for n in self.train_nodes if self.deg[id2idx[n]] > 0]`

			`def _make_label_vec(self, node):`
			`label = self.label_map[node]`
			`if isinstance(label, list):`
			`label_vec = np.array(label)`
			`else:`
			`label_vec = np.zeros((self.num_classes))`
			`class_ind = self.label_map[node]`
			`label_vec[class_ind] = 1`
			`return label_vec`

			`def construct_adj(self):`
			`adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))`
			`deg = np.zeros((len(self.id2idx),))`

			`for nodeid in self.G.nodes():`
			`if self.G.node[nodeid]['test'] or self.G.node[nodeid]['val']:`
			`continue`
			`neighbors = np.array([self.id2idx[neighbor]`
			`for neighbor in self.G.neighbors(nodeid)`
			`if (not self.G[nodeid][neighbor]['train_removed'])])`
			`deg[self.id2idx[nodeid]] = len(neighbors)`
			`if len(neighbors) == 0:`
			`continue`
			`if len(neighbors) > self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=False)`
			`elif len(neighbors) < self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=True)`
			`adj[self.id2idx[nodeid], :] = neighbors`
			`return adj, deg`

			`def construct_test_adj(self):`
			`adj = len(self.id2idx)*np.ones((len(self.id2idx)+1, self.max_degree))`
			`for nodeid in self.G.nodes():`
			`neighbors = np.array([self.id2idx[neighbor]`
			`for neighbor in self.G.neighbors(nodeid)])`
			`if len(neighbors) == 0:`
			`continue`
			`if len(neighbors) > self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=False)`
			`elif len(neighbors) < self.max_degree:`
			`neighbors = np.random.choice(neighbors, self.max_degree, replace=True)`
			`adj[self.id2idx[nodeid], :] = neighbors`
			`return adj`

			`def end(self):`
			`return self.batch_num * self.batch_size > len(self.train_nodes) - self.batch_size`

			`def batch_feed_dict(self, batch_nodes, val=False):`
			`batch1id = batch_nodes`
			`batch1 = [self.id2idx[n] for n in batch1id]`

			`labels = np.vstack([self._make_label_vec(node) for node in batch1id])`
			`feed_dict = dict()`
			`feed_dict.update({self.placeholders['batch_size'] : len(batch1)})`
			`feed_dict.update({self.placeholders['batch']: batch1})`
			`feed_dict.update({self.placeholders['labels']: labels})`

			`return feed_dict, labels`

			`def node_val_feed_dict(self, size=None, test=False):`
			`if test:`
			`val_nodes = self.test_nodes`
			`else:`
			`val_nodes = self.val_nodes`
			`if not size is None:`
			`val_nodes = np.random.choice(val_nodes, size, replace=True)`
			`# add a dummy neighbor`
			`ret_val = self.batch_feed_dict(val_nodes)`
			`return ret_val[0], ret_val[1]`

			`def incremental_node_val_feed_dict(self, size, iter_num, test=False):`
			`if test:`
			`val_nodes = self.test_nodes`
			`else:`
			`val_nodes = self.val_nodes`
			`val_node_subset = val_nodes[iter_numsize:min((iter_num+1)size,`
			`len(val_nodes))]`

			`# add a dummy neighbor`
			`ret_val = self.batch_feed_dict(val_node_subset)`
			`return ret_val[0], ret_val[1], (iter_num+1)*size >= len(val_nodes), val_node_subset`

ppi eval 2017-10-14 04:29:31 +08:00			`def num_training_batches(self):`
			`return len(self.train_nodes) // self.batch_size + 1`

Initial commit of cleaned repo. 2017-05-29 23:35:30 +08:00			`def next_minibatch_feed_dict(self):`
			`start = self.batch_num * self.batch_size`
			`self.batch_num += 1`
			`batch_nodes = self.train_nodes[start : start + self.batch_size]`
			`return self.batch_feed_dict(batch_nodes)`

			`def incremental_embed_feed_dict(self, size, iter_num):`
			`node_list = self.nodes`
			`val_nodes = node_list[iter_numsize:min((iter_num+1)size,`
			`len(node_list))]`
			`return self.batch_feed_dict(val_nodes), (iter_num+1)*size >= len(node_list), val_nodes`

			`def shuffle(self):`
			`""" Re-shuffle the training set.`
			`Also reset the batch number.`
			`"""`
			`self.train_nodes = np.random.permutation(self.train_nodes)`
			`self.batch_num = 0`