Source code for pysgmcmc.samplers.svgd

import tensorflow as tf
from pysgmcmc.tensor_utils import pdist, squareform, median
from pysgmcmc.stepsize_schedules import ConstantStepsizeSchedule
from pysgmcmc.samplers.base_classes import MCMCSampler


# XXX: Interface needs to change more: particles should be List[List[tensorflow.Variable]]
# where each inner list is one guess of a network.
# This would enable the bnn code to change such that SVGD becomes applicable
# to our BNN.


[docs]class SVGDSampler(MCMCSampler): """ Stein Variational Gradient Descent Sampler. See [1] for more details on stein variational gradient descent.\n [1] Q. Liu, D. Wang In Advances in Neural Information Processing Systems 29 (2016).\n `Stein Variational Gradient Descent: A General Purpose Bayesian Inference Algorithm. <https://arxiv.org/pdf/1608.04471>`_ """
[docs] def __init__(self, particles, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.1), alpha=0.9, fudge_factor=1e-6, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. Parameters ---------- particles : List[tensorflow.Variable] List of particles each representing a (different) guess of the target parameters of this sampler. cost_fun : callable Function that takes `params` of *one* particle as input and returns a 1-d `tensorflow.Tensor` that contains the cost-value. Frequently denoted with `U` in literature. batch_generator : iterable, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` alpha : float, optional TODO DOKU Defaults to `0.9`. fudge_factor : float, optional TODO DOKU Defaults to `1e-6`. session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- pysgmcmc.sampling.MCMCSampler: Base class for `SteinVariationalGradientDescentSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ assert isinstance(alpha, (int, float)) assert isinstance(fudge_factor, (int, float)) assert callable(cost_fun) self.particles = tf.stack(particles) def cost_fun_wrapper(params): return tf.map_fn(lambda particle: cost_fun(particle), self.particles) cost_fun_wrapper.__name__ = cost_fun.__name__ super().__init__( params=particles, cost_fun=cost_fun_wrapper, batch_generator=batch_generator, session=session, seed=seed, dtype=dtype, stepsize_schedule=stepsize_schedule ) fudge_factor = tf.constant( fudge_factor, dtype=self.dtype, name="fudge_factor" ) self.epsilon = tf.Variable( stepsize_schedule.initial_value, dtype=self.dtype, name="stepsize" ) self.n_particles = tf.cast( self.particles.shape[0], self.dtype ) historical_grad = tf.get_variable( "historical_grad", self.particles.shape, dtype=dtype, initializer=tf.zeros_initializer() ) self.session.run( tf.variables_initializer([historical_grad, self.epsilon]) ) lnpgrad = tf.squeeze(tf.gradients(self.cost, self.particles)) kernel_matrix, kernel_gradients = self.svgd_kernel(self.particles) grad_theta = tf.divide( tf.matmul(kernel_matrix, lnpgrad) + kernel_gradients, self.n_particles ) historical_grad_t = tf.assign( historical_grad, alpha * historical_grad + (1. - alpha) * (grad_theta ** 2) ) adj_grad = tf.divide( grad_theta, fudge_factor + tf.sqrt(historical_grad_t) ) for i, param in enumerate(self.params): self.theta_t[i] = tf.assign_sub( param, self.epsilon * adj_grad[i] )
[docs] def svgd_kernel(self, particles): """ Calculate a kernel matrix with corresponding derivatives for the given `particles`. TODO: DOKU ON KERNEL TRICK Parameters ---------- particles : TODO Returns ---------- kernel_matrix : tf.Tensor TODO kernel_gradients : tf.Tensor TODO """ euclidean_distances = pdist(particles) pairwise_distances = squareform(euclidean_distances) ** 2 # kernel trick h = tf.sqrt( 0.5 * median(pairwise_distances) / tf.log(self.n_particles + 1.) ) kernel_matrix = tf.exp(-pairwise_distances / h ** 2 / 2) kernel_sum = tf.reduce_sum(kernel_matrix, axis=1) kernel_gradients = tf.add( -tf.matmul(kernel_matrix, particles), tf.multiply(particles, tf.expand_dims(kernel_sum, axis=1)) ) return kernel_matrix, kernel_gradients / (h ** 2)
# XXX: Probably unnecessary. Changes should happen toplevel. # However using this to test *just* the svgd implementation and make # it conform to list of lists interface still seems reasonable. # Later: make BNN use multiple get_net calls to get variables # and extract appropriate groups from tf.trainable_variables # (use scope prefix) def _duplicate_variables(self, variables, duplicate_index): duplicate = [] for var in variables: name = var.name.split(":")[0] + "_" + str(duplicate_index) dup_var = tf.get_variable(name, initializer=var.initializer._inputs[1]) # session.run(tf.variables_initializer([dup_var])) duplicate.append(dup_var) return duplicate