Source code for pysgmcmc.samplers.relativistic_sghmc

# vim:foldmethod=marker
import tensorflow as tf
from pysgmcmc.samplers.base_classes import MCMCSampler
from pysgmcmc.stepsize_schedules import ConstantStepsizeSchedule

from pysgmcmc.tensor_utils import (
    vectorize, unvectorize
)

from arspy.ars import adaptive_rejection_sampling


[docs]class RelativisticSGHMCSampler(MCMCSampler): """ Relativistic Stochastic Gradient Hamiltonian Monte-Carlo Sampler. See [1] for more details on Relativistic SGHMC. [1] X. Lu, V. Perrone, L. Hasenclever, Y. W. Teh, S. J. Vollmer In Proceedings of the 20 th International Conference on Artificial Intelligence and Statistics (AISTATS) 2017\n `Relativistic Monte Carlo <http://proceedings.mlr.press/v54/lu17b/lu17b.pdf>`_ """
[docs] def __init__(self, params, cost_fun, batch_generator=None, stepsize_schedule=ConstantStepsizeSchedule(0.001), mass=1.0, speed_of_light=1.0, D=1.0, Bhat=0.0, session=tf.get_default_session(), dtype=tf.float64, seed=None): """ Initialize the sampler parameters and set up a tensorflow.Graph for later queries. Parameters ---------- params : list of tensorflow.Variable objects Target parameters for which we want to sample new values. Cost : tensorflow.Tensor 1-d Cost tensor that depends on `params`. Frequently denoted as U(theta) in literature. batch_generator : BatchGenerator, optional Iterable which returns dictionaries to feed into tensorflow.Session.run() calls to evaluate the cost function. Defaults to `None` which indicates that no batches shall be fed. stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule Iterator class that produces a stream of stepsize values that we can use in our samplers. See also: `pysgmcmc.stepsize_schedules` mass : float, optional mass constant. Defaults to `1.0`. speed_of_light : float, optional "Speed of light" constant. TODO EXTEND DOKU Defaults to `1.0`. D : float, optional Diffusion constant. Defaults to `1.0`. Bhat : float, optional TODO: Documentation session : tensorflow.Session, optional Session object which knows about the external part of the graph (which defines `Cost`, and possibly batches). Used internally to evaluate (burn-in/sample) the sampler. dtype : tensorflow.DType, optional Type of elements of `tensorflow.Tensor` objects used in this sampler. Defaults to `tensorflow.float64`. seed : int, optional Random seed to use. Defaults to `None`. See Also ---------- pysgmcmc.sampling.MCMCSampler: Base class for `RelativisticSGHMCSampler` that specifies how actual sampling is performed (using iterator protocol, e.g. `next(sampler)`). """ # Set up MCMCSampler base class: # initialize member variables common to all samplers # and run initializers for all uninitialized variables in `params` # (to avoid errors in the graph definitions below). super().__init__( params=params, cost_fun=cost_fun, batch_generator=batch_generator, stepsize_schedule=stepsize_schedule, seed=seed, dtype=dtype, session=session ) # Use `-self.Cost` since the rest of the implementation expects # a log likelihood (instead of the *negative* log likelihood that # we normally use as costs) grads = [ vectorize(gradient) for gradient in tf.gradients(-self.cost, params) ] D = tf.constant(D, dtype=dtype) b_hat = tf.constant(Bhat, dtype=dtype) momentum = [ tf.Variable(momentum_sample, dtype=dtype) for momentum_sample in _sample_relativistic_momentum( m=mass, c=speed_of_light, n_params=len(self.params), seed=self.seed ) ] # In internal implementation, stick to mathematical formulas. # For users, prefer readability. m = tf.constant(mass, dtype=dtype) c = tf.constant(speed_of_light, dtype=dtype) for i, (param, grad) in enumerate(zip(params, grads)): vectorized_param = self.vectorized_params[i] p_grad = self.epsilon * momentum[i] / (m * tf.sqrt(momentum[i] * momentum[i] / (tf.square(m) * tf.square(c)) + 1)) n = tf.sqrt(self.epsilon * (2 * D - self.epsilon * b_hat)) * tf.random_normal(shape=vectorized_param.shape, dtype=dtype, seed=seed) momentum_t = tf.assign_add( momentum[i], tf.reshape(self.epsilon * grad + n - D * p_grad, momentum[i].shape) ) p_grad_new = self.epsilon * momentum_t / (m * tf.sqrt(momentum_t * momentum_t / (tf.square(m) * tf.square(c)) + 1)) vectorized_theta_t = tf.assign_add( vectorized_param, tf.reshape(p_grad_new, vectorized_param.shape) ) self.theta_t[i] = tf.assign( param, unvectorize(vectorized_theta_t, original_shape=param.shape) )
[docs]def _sample_relativistic_momentum(m, c, n_params, bounds=(float("-inf"), float("inf")), seed=None): """ Use adaptive rejection sampling (here: provided by external library `ARSpy`) to sample initial values for relativistic momentum `p`. The relativistic momentum variable in Relativistic MCMC has (marginal) distribution .. math:: \\propto e^{-K(p)} where :math:`K(p)` is the relativistic kinetic energy. This distribution is a multivariate generalisation of the symmetric hyperbolic distribution, which cannot easily be sampled directly. Therefore we resort to *adaptive rejection sampling* to generate our samples and initialize our momentum terms properly. See `the paper "Relativistic Monte Carlo" <http://proceedings.mlr.press/v54/lu17b/lu17b.pdf#page=2>`_ for more information on Relativistic Hamiltonian Dynamics. See `Generalized hyperbolic distribution <https://en.wikipedia.org/wiki/Generalised_hyperbolic_distribution>`_ for more information on our target distribution. Parameters ---------- m : float Mass constant used for sampling. c : float Speed of light constant used for sampling. n_params : int Number of target parameters of the target log pdf to sample from. bounds : Tuple[float, float], optional Adaptive rejection sampling bounds to use during sampling. Defaults to `(float("-inf"), float("inf"))`, i.e. unbounded adaptive rejection sampling. seed : int Random seed to use for adaptive rejection sampling. Returns ---------- momentum_samples : list Samples used to initialize our samplers momentum variables. Examples ---------- Drawing 10 momentum values for 10 target parameters via (unbounded) adaptive rejection sampling: >>> n_params = 10 >>> momentum_values = _sample_relativistic_momentum(m=1.0, c=1.0, n_params=n_params) >>> len(momentum_values) == n_params True See also ---------- `ARSpy`: Our external dependency that handles adaptive rejection sampling. Available `here <https://github.com/MFreidank/pyars>`_. """ # XXX: Remove when more is supported, currently only floats for mass # and c are. assert isinstance(m, float) assert isinstance(c, float) def generate_relativistic_logpdf(m, c): def relativistic_log_pdf(p): """ Logarithm of pdf of (multivariate) generalized hyperbolic distribution. """ from numpy import sqrt return -m * c ** 2 * sqrt(p ** 2 / (m ** 2 * c ** 2) + 1.) return relativistic_log_pdf momentum_log_pdf = generate_relativistic_logpdf(m=m, c=c) return adaptive_rejection_sampling( logpdf=momentum_log_pdf, a=-10.0, b=10.0, domain=bounds, n_samples=n_params, seed=seed )