Source code for pysgmcmc.samplers.relativistic_sghmc

# vim:foldmethod=marker
import tensorflow as tf
from pysgmcmc.samplers.base_classes import MCMCSampler
from pysgmcmc.stepsize_schedules import ConstantStepsizeSchedule

from pysgmcmc.tensor_utils import (
    vectorize, unvectorize
)

from arspy.ars import adaptive_rejection_sampling


[docs]class RelativisticSGHMCSampler(MCMCSampler):
    """ Relativistic Stochastic Gradient Hamiltonian Monte-Carlo Sampler.

        See [1] for more details on Relativistic SGHMC.

        [1] X. Lu, V. Perrone, L. Hasenclever, Y. W. Teh, S. J. Vollmer
            In Proceedings of the 20 th International Conference on Artificial Intelligence and Statistics (AISTATS) 2017\n
            `Relativistic Monte Carlo <http://proceedings.mlr.press/v54/lu17b/lu17b.pdf>`_

    """

[docs]    def __init__(self, params, cost_fun, batch_generator=None,
                 stepsize_schedule=ConstantStepsizeSchedule(0.001),
                 mass=1.0, speed_of_light=1.0, D=1.0, Bhat=0.0,
                 session=tf.get_default_session(), dtype=tf.float64, seed=None):
        """ Initialize the sampler parameters and set up a tensorflow.Graph
            for later queries.

        Parameters
        ----------
        params : list of tensorflow.Variable objects
            Target parameters for which we want to sample new values.

        Cost : tensorflow.Tensor
            1-d Cost tensor that depends on `params`.
            Frequently denoted as U(theta) in literature.

        batch_generator : BatchGenerator, optional
            Iterable which returns dictionaries to feed into
            tensorflow.Session.run() calls to evaluate the cost function.
            Defaults to `None` which indicates that no batches shall be fed.

        stepsize_schedule : pysgmcmc.stepsize_schedules.StepsizeSchedule
            Iterator class that produces a stream of stepsize values that
            we can use in our samplers.
            See also: `pysgmcmc.stepsize_schedules`

        mass : float, optional
            mass constant.
            Defaults to `1.0`.

        speed_of_light : float, optional
            "Speed of light" constant. TODO EXTEND DOKU
            Defaults to `1.0`.

        D : float, optional
            Diffusion constant.
            Defaults to `1.0`.

        Bhat : float, optional
            TODO: Documentation

        session : tensorflow.Session, optional
            Session object which knows about the external part of the graph
            (which defines `Cost`, and possibly batches).
            Used internally to evaluate (burn-in/sample) the sampler.

        dtype : tensorflow.DType, optional
            Type of elements of `tensorflow.Tensor` objects used in this sampler.
            Defaults to `tensorflow.float64`.

        seed : int, optional
            Random seed to use.
            Defaults to `None`.

        See Also
        ----------
        pysgmcmc.sampling.MCMCSampler:
            Base class for `RelativisticSGHMCSampler` that specifies how
            actual sampling is performed (using iterator protocol,
            e.g. `next(sampler)`).

        """

        # Set up MCMCSampler base class:
        # initialize member variables common to all samplers
        # and run initializers for all uninitialized variables in `params`
        # (to avoid errors in the graph definitions below).
        super().__init__(
            params=params, cost_fun=cost_fun, batch_generator=batch_generator,
            stepsize_schedule=stepsize_schedule,
            seed=seed, dtype=dtype, session=session
        )

        # Use `-self.Cost` since the rest of the implementation expects
        # a log likelihood (instead of the *negative* log likelihood that
        # we normally use as costs)
        grads = [
            vectorize(gradient)
            for gradient in tf.gradients(-self.cost, params)
        ]

        D = tf.constant(D, dtype=dtype)
        b_hat = tf.constant(Bhat, dtype=dtype)

        momentum = [
            tf.Variable(momentum_sample, dtype=dtype)
            for momentum_sample in _sample_relativistic_momentum(
                m=mass, c=speed_of_light, n_params=len(self.params), seed=self.seed
            )
        ]

        # In internal implementation, stick to mathematical formulas.
        # For users, prefer readability.
        m = tf.constant(mass, dtype=dtype)
        c = tf.constant(speed_of_light, dtype=dtype)

        for i, (param, grad) in enumerate(zip(params, grads)):
            vectorized_param = self.vectorized_params[i]

            p_grad = self.epsilon * momentum[i] / (m * tf.sqrt(momentum[i] * momentum[i] / (tf.square(m) * tf.square(c)) + 1))

            n = tf.sqrt(self.epsilon * (2 * D - self.epsilon * b_hat)) * tf.random_normal(shape=vectorized_param.shape, dtype=dtype, seed=seed)
            momentum_t = tf.assign_add(
                momentum[i],
                tf.reshape(self.epsilon * grad + n - D * p_grad, momentum[i].shape)
            )

            p_grad_new = self.epsilon * momentum_t / (m * tf.sqrt(momentum_t * momentum_t / (tf.square(m) * tf.square(c)) + 1))
            vectorized_theta_t = tf.assign_add(
                vectorized_param,
                tf.reshape(p_grad_new, vectorized_param.shape)
            )

            self.theta_t[i] = tf.assign(
                param,
                unvectorize(vectorized_theta_t, original_shape=param.shape)
            )


[docs]def _sample_relativistic_momentum(m, c, n_params,
                                  bounds=(float("-inf"), float("inf")),
                                  seed=None):
    """
    Use adaptive rejection sampling (here: provided by external library `ARSpy`)
    to sample initial values for relativistic momentum `p`.
    The relativistic momentum variable in Relativistic MCMC has (marginal)
    distribution
    .. math:: \\propto e^{-K(p)}
    where :math:`K(p)` is the relativistic kinetic energy.
    This distribution is a multivariate generalisation of the symmetric
    hyperbolic distribution, which cannot easily be sampled directly.
    Therefore we resort to *adaptive rejection sampling* to generate our samples
    and initialize our momentum terms properly.

    See `the paper "Relativistic Monte Carlo" <http://proceedings.mlr.press/v54/lu17b/lu17b.pdf#page=2>`_ for more information on Relativistic Hamiltonian Dynamics.

    See `Generalized hyperbolic distribution <https://en.wikipedia.org/wiki/Generalised_hyperbolic_distribution>`_ for more information on our target distribution.

    Parameters
    ----------
    m : float
        Mass constant used for sampling.

    c : float
        Speed of light constant used for sampling.

    n_params : int
        Number of target parameters of the target log pdf to sample from.

    bounds : Tuple[float, float], optional
        Adaptive rejection sampling bounds to use during sampling.
        Defaults to `(float("-inf"), float("inf"))`, i.e. unbounded
        adaptive rejection sampling.

    seed : int
        Random seed to use for adaptive rejection sampling.

    Returns
    ----------
    momentum_samples : list
        Samples used to initialize our samplers momentum variables.

    Examples
    ----------

    Drawing 10 momentum values for 10 target parameters via (unbounded)
    adaptive rejection sampling:

    >>> n_params = 10
    >>> momentum_values = _sample_relativistic_momentum(m=1.0, c=1.0, n_params=n_params)
    >>> len(momentum_values) == n_params
    True

    See also
    ----------
    `ARSpy`: Our external dependency that handles adaptive rejection sampling.
             Available `here <https://github.com/MFreidank/pyars>`_.

    """
    # XXX: Remove when more is supported, currently only floats for mass
    # and c are.
    assert isinstance(m, float)
    assert isinstance(c, float)

    def generate_relativistic_logpdf(m, c):
        def relativistic_log_pdf(p):
            """
            Logarithm of pdf of (multivariate) generalized
            hyperbolic distribution.
            """
            from numpy import sqrt
            return -m * c ** 2 * sqrt(p ** 2 / (m ** 2 * c ** 2) + 1.)
        return relativistic_log_pdf

    momentum_log_pdf = generate_relativistic_logpdf(m=m, c=c)
    return adaptive_rejection_sampling(
        logpdf=momentum_log_pdf, a=-10.0, b=10.0,
        domain=bounds, n_samples=n_params,
        seed=seed
    )