from collections import namedtuple import tensorflow as tf import math import graphsage.layers as layers import graphsage.metrics as metrics from .prediction import BipartiteEdgePredLayer from .aggregators import MeanAggregator, PoolingAggregator, SeqAggregator, GCNAggregator, TwoLayerPoolingAggregator flags = tf.app.flags FLAGS = flags.FLAGS # DISCLAIMER: # Boilerplate parts of this code file were originally forked from # https://github.com/tkipf/gcn # which itself was very inspired by the keras package # (A full license with proper attributions will be provided in the # public repo of this code base) class Model(object): def __init__(self, **kwargs): allowed_kwargs = {'name', 'logging', 'model_size'} for kwarg in kwargs.keys(): assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg name = kwargs.get('name') if not name: name = self.__class__.__name__.lower() self.name = name logging = kwargs.get('logging', False) self.logging = logging self.vars = {} self.placeholders = {} self.layers = [] self.activations = [] self.inputs = None self.outputs = None self.loss = 0 self.accuracy = 0 self.optimizer = None self.opt_op = None def _build(self): raise NotImplementedError def build(self): """ Wrapper for _build() """ with tf.variable_scope(self.name): self._build() # Build sequential layer model self.activations.append(self.inputs) for layer in self.layers: hidden = layer(self.activations[-1]) self.activations.append(hidden) self.outputs = self.activations[-1] # Store model variables for easy access variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) self.vars = {var.name: var for var in variables} # Build metrics self._loss() self._accuracy() self.opt_op = self.optimizer.minimize(self.loss) def predict(self): pass def _loss(self): raise NotImplementedError def _accuracy(self): raise NotImplementedError def save(self, sess=None): if not sess: raise AttributeError("TensorFlow session not provided.") saver = tf.train.Saver(self.vars) save_path = saver.save(sess, "tmp/%s.ckpt" % self.name) print("Model saved in file: %s" % save_path) def load(self, sess=None): if not sess: raise AttributeError("TensorFlow session not provided.") saver = tf.train.Saver(self.vars) save_path = "tmp/%s.ckpt" % self.name saver.restore(sess, save_path) print("Model restored from file: %s" % save_path) class MLP(Model): def __init__(self, placeholders, dims, categorical=True, **kwargs): super(MLP, self).__init__(**kwargs) self.dims = dims self.input_dim = dims[0] self.output_dim = dims[-1] self.placeholders = placeholders self.categorical = categorical self.inputs = placeholders['features'] self.labels = placeholders['labels'] self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) self.build() def _loss(self): # Weight decay loss for var in self.layers[0].vars.values(): self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var) # Cross entropy error if self.categorical: self.loss += metrics.masked_softmax_cross_entropy(self.outputs, self.placeholders['labels'], self.placeholders['labels_mask']) # L2 else: diff = self.labels - self.outputs self.loss += tf.reduce_sum(tf.sqrt(tf.reduce_sum(diff * diff, axis=1))) def _accuracy(self): if self.categorical: self.accuracy = metrics.masked_accuracy(self.outputs, self.placeholders['labels'], self.placeholders['labels_mask']) def _build(self): self.layers.append(layers.Dense(input_dim=self.input_dim, output_dim=self.dims[1], act=tf.nn.relu, dropout=self.placeholders['dropout'], sparse_inputs=False, logging=self.logging)) self.layers.append(layers.Dense(input_dim=self.dims[1], output_dim=self.output_dim, act=lambda x: x, dropout=self.placeholders['dropout'], logging=self.logging)) def predict(self): return tf.nn.softmax(self.outputs) class GeneralizedModel(Model): """ Base class for models that aren't constructed from traditional, sequential layers. Subclasses must set self.outputs in _build method (Removes the layers idiom from build method of the Model class) """ def __init__(self, **kwargs): super(GeneralizedModel, self).__init__(**kwargs) def build(self): """ Wrapper for _build() """ with tf.variable_scope(self.name): self._build() # Store model variables for easy access variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name) self.vars = {var.name: var for var in variables} # Build metrics self._loss() self._accuracy() self.opt_op = self.optimizer.minimize(self.loss) # SAGEInfo is a namedtuple that specifies the parameters # of the recursive sampled GCN layers SAGEInfo = namedtuple("SAGEInfo", ['layer_name', # name of the layer (to get feature embedding etc.) 'neigh_sampler', # callable neigh_sampler constructor 'num_samples', 'output_dim' # the output (i.e., hidden) dimension ]) class SampleAndAggregate(GeneralizedModel): """ Implementation of a standard 2-step graph convolutional network Uses random sampling on neighborhoods """ def __init__(self, placeholders, features, adj, degrees, layer_infos, concat=True, aggregator_type="mean", model_size="small", **kwargs): ''' Args: - layer_infos: List of SGCInfo namedtuples that describe the parameters of all the recursive layers. See SGCInfo definition above. ''' super(SampleAndAggregate, self).__init__(**kwargs) if aggregator_type == "mean": self.aggregator_cls = MeanAggregator elif aggregator_type == "seq": self.aggregator_cls = SeqAggregator elif aggregator_type == "pool": self.aggregator_cls = PoolingAggregator elif aggregator_type == "pool_2": self.aggregator_cls = TwoLayerPoolingAggregator elif aggregator_type == "gcn": self.aggregator_cls = GCNAggregator else: raise Exception("Unknown aggregator: ", self.aggregator_cls) # get info from placeholders... self.inputs1 = placeholders["batch1"] self.inputs2 = placeholders["batch2"] self.model_size = model_size self.adj_info = adj self.features = tf.Variable(tf.constant(features, dtype=tf.float32), trainable=False) self.degrees = degrees self.concat = concat self.dims = [features.shape[1]] self.dims.extend([layer_infos[i].output_dim for i in range(len(layer_infos))]) self.batch_size = placeholders["batch_size"] self.placeholders = placeholders self.layer_infos = layer_infos self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) self.build() def sample(self, inputs, layer_infos, batch_size=None): """ Sample neighbors to be the supportive fields for multi-layer convolutions. Args: inputs: batch inputs batch_size: the number of inputs (different for batch inputs and negative samples). """ if batch_size is None: batch_size = self.batch_size samples = [inputs] # size of convolution support at each layer per node support_size = 1 support_sizes = [support_size] for k in range(len(layer_infos)): t = len(layer_infos) - k - 1 support_size *= layer_infos[t].num_samples sampler = layer_infos[t].neigh_sampler node = sampler((samples[k], layer_infos[t].num_samples)) samples.append(tf.reshape(node, [support_size * batch_size,])) support_sizes.append(support_size) return samples, support_sizes def aggregate(self, samples, input_features, dims, num_samples, support_sizes, batch_size=None, aggregators=None, name=None, concat=False, model_size="small"): """ At each layer, aggregate hidden representations of neighbors to compute the hidden representations at next layer. Args: samples: a list of samples of variable hops away for convolving at each layer of the network. Length is the number of layers + 1. Each is a vector of node indices. input_features: the input features for each sample of various hops away. dims: a list of dimensions of the hidden representations from the input layer to the final layer. Length is the number of layers + 1. num_samples: list of number of samples for each layer. support_sizes: the number of nodes to gather information from for each layer. batch_size: the number of inputs (different for batch inputs and negative samples). Returns: The hidden representation at the final layer for all nodes in batch """ if batch_size is None: batch_size = self.batch_size # length: number of layers + 1 hidden = [tf.nn.embedding_lookup(input_features, node_samples) for node_samples in samples] new_agg = aggregators is None if new_agg: aggregators = [] for layer in range(len(num_samples)): if new_agg: dim_mult = 2 if concat and (layer != 0) else 1 # aggregator at current layer if layer == len(num_samples) - 1: aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], act=lambda x : x, dropout=self.placeholders['dropout'], name=name, concat=concat, model_size=model_size) else: aggregator = self.aggregator_cls(dim_mult*dims[layer], dims[layer+1], dropout=self.placeholders['dropout'], name=name, concat=concat, model_size=model_size) aggregators.append(aggregator) else: aggregator = aggregators[layer] # hidden representation at current layer for all support nodes that are various hops away next_hidden = [] # as layer increases, the number of support nodes needed decreases for hop in range(len(num_samples) - layer): dim_mult = 2 if concat and (layer != 0) else 1 neigh_dims = [batch_size * support_sizes[hop], num_samples[len(num_samples) - hop - 1], dim_mult*dims[layer]] h = aggregator((hidden[hop], tf.reshape(hidden[hop + 1], neigh_dims))) next_hidden.append(h) hidden = next_hidden return hidden[0], aggregators def _build(self): labels = tf.reshape( tf.cast(self.placeholders['batch2'], dtype=tf.int64), [self.batch_size, 1]) self.neg_samples, _, _ = (tf.nn.fixed_unigram_candidate_sampler( true_classes=labels, num_true=1, num_sampled=FLAGS.neg_sample_size, unique=False, range_max=len(self.degrees), distortion=0.75, unigrams=self.degrees.tolist())) # perform "convolution" samples1, support_sizes1 = self.sample(self.inputs1, self.layer_infos) samples2, support_sizes2 = self.sample(self.inputs2, self.layer_infos) num_samples = [layer_info.num_samples for layer_info in self.layer_infos] self.outputs1, self.aggregators = self.aggregate(samples1, [self.features], self.dims, num_samples, support_sizes1, concat=self.concat, model_size=self.model_size) self.outputs2, _ = self.aggregate(samples2, [self.features], self.dims, num_samples, support_sizes2, aggregators=self.aggregators, concat=self.concat, model_size=self.model_size) neg_samples, neg_support_sizes = self.sample(self.neg_samples, self.layer_infos, FLAGS.neg_sample_size) self.neg_outputs, _ = self.aggregate(neg_samples, [self.features], self.dims, num_samples, neg_support_sizes, batch_size=FLAGS.neg_sample_size, aggregators=self.aggregators, concat=self.concat, model_size=self.model_size) dim_mult = 2 if self.concat else 1 self.link_pred_layer = BipartiteEdgePredLayer(dim_mult*self.dims[-1], dim_mult*self.dims[-1], self.placeholders, act=tf.nn.sigmoid, bilinear_weights=False, name='edge_predict') self.outputs1 = tf.nn.l2_normalize(self.outputs1, 1) self.outputs2 = tf.nn.l2_normalize(self.outputs2, 1) self.neg_outputs = tf.nn.l2_normalize(self.neg_outputs, 1) def build(self): self._build() # TF graph management self._loss() self._accuracy() self.loss = self.loss / tf.cast(self.batch_size, tf.float32) grads_and_vars = self.optimizer.compute_gradients(self.loss) clipped_grads_and_vars = [(tf.clip_by_value(grad, -5.0, 5.0) if grad is not None else None, var) for grad, var in grads_and_vars] self.grad, _ = clipped_grads_and_vars[0] self.opt_op = self.optimizer.apply_gradients(clipped_grads_and_vars) def _loss(self): for aggregator in self.aggregators: for var in aggregator.vars.values(): self.loss += FLAGS.weight_decay * tf.nn.l2_loss(var) self.loss = self.link_pred_layer.loss(self.outputs1, self.outputs2, self.neg_outputs) self.loss = self.loss / tf.cast(self.batch_size, tf.float32) tf.summary.scalar('loss', self.loss) def _accuracy(self): # shape: [batch_size] aff = self.link_pred_layer.affinity(self.outputs1, self.outputs2) # shape : [batch_size x num_neg_samples] self.neg_aff = self.link_pred_layer.neg_cost(self.outputs1, self.neg_outputs) self.neg_aff = tf.reshape(self.neg_aff, [self.batch_size, FLAGS.neg_sample_size]) _aff = tf.expand_dims(aff, axis=1) self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff]) size = tf.shape(self.aff_all)[1] _, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size) _, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size) self.mrr = tf.reduce_mean(tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32))) tf.summary.scalar('mrr', self.mrr) class Node2VecModel(GeneralizedModel): def __init__(self, placeholders, dict_size, degrees, name=None, nodevec_dim=50, lr=0.001, **kwargs): """ Simple version of Node2Vec algorithm. Args: dict_size1: the total number of nodes in set1. dict_size2: the total number of nodes in set2. nodevec_dim: dimension of the vector representation of node. lr: learning rate of optimizer. """ super(Node2VecModel, self).__init__(**kwargs) self.placeholders = placeholders self.degrees = degrees self.inputs1 = placeholders["batch1"] self.inputs2 = placeholders["batch2"] self.batch_size = placeholders['batch_size'] self.hidden_dim = nodevec_dim # following the tensorflow word2vec tutorial self.target_embeds = tf.Variable( tf.random_uniform([dict_size, nodevec_dim], -1, 1), name="target_embeds") self.context_embeds = tf.Variable( tf.truncated_normal([dict_size, nodevec_dim], stddev=1.0 / math.sqrt(nodevec_dim)), name="context_embeds") self.context_bias = tf.Variable( tf.zeros([dict_size]), name="context_bias") self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.build() def _build(self): labels = tf.reshape( tf.cast(self.placeholders['batch2'], dtype=tf.int64), [self.batch_size, 1]) self.neg_samples, _, _ = (tf.nn.fixed_unigram_candidate_sampler( true_classes=labels, num_true=1, num_sampled=FLAGS.neg_sample_size, unique=True, range_max=len(self.degrees), distortion=0.75, unigrams=self.degrees.tolist())) self.outputs1 = tf.nn.embedding_lookup(self.target_embeds, self.inputs1) self.outputs2 = tf.nn.embedding_lookup(self.context_embeds, self.inputs2) self.outputs2_bias = tf.nn.embedding_lookup(self.context_bias, self.inputs2) self.neg_outputs = tf.nn.embedding_lookup(self.context_embeds, self.neg_samples) self.neg_outputs_bias = tf.nn.embedding_lookup(self.context_bias, self.neg_samples) self.link_pred_layer = BipartiteEdgePredLayer(self.hidden_dim, self.hidden_dim, self.placeholders, bilinear_weights=False) def build(self): self._build() # TF graph management self._loss() self._minimize() self._accuracy() def _minimize(self): self.opt_op = self.optimizer.minimize(self.loss) def _loss(self): aff = tf.reduce_sum(tf.multiply(self.outputs1, self.outputs2), 1) + self.outputs2_bias neg_aff = tf.matmul(self.outputs2, tf.transpose(self.neg_outputs)) + self.neg_outputs_bias true_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(aff), logits=aff) negative_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(neg_aff), logits=neg_aff) loss = tf.reduce_sum(true_xent) + tf.reduce_sum(negative_xent) self.loss = loss / tf.cast(self.batch_size, tf.float32) tf.summary.scalar('loss', self.loss) def _accuracy(self): # shape: [batch_size] aff = self.link_pred_layer.affinity(self.outputs1, self.outputs2) # shape : [batch_size x num_neg_samples] self.neg_aff = self.link_pred_layer.neg_cost(self.outputs1, self.neg_outputs) self.neg_aff = tf.reshape(self.neg_aff, [self.batch_size, FLAGS.neg_sample_size]) _aff = tf.expand_dims(aff, axis=1) self.aff_all = tf.concat(axis=1, values=[self.neg_aff, _aff]) size = tf.shape(self.aff_all)[1] _, indices_of_ranks = tf.nn.top_k(self.aff_all, k=size) _, self.ranks = tf.nn.top_k(-indices_of_ranks, k=size) self.mrr = tf.reduce_mean(tf.div(1.0, tf.cast(self.ranks[:, -1] + 1, tf.float32))) tf.summary.scalar('mrr', self.mrr)