Source code for pysgmcmc.models.bayesian_neural_network

# vim:foldmethod=marker
from collections import OrderedDict
from itertools import islice
import logging
import typing

import numpy as np
import torch
import torch.nn as nn
from torch.utils import data as data_utils

from pysgmcmc.models.architectures import simple_tanh_network
from pysgmcmc.data.utils import (
    infinite_dataloader,
    zero_mean_unit_var_normalization,
    zero_mean_unit_var_unnormalization
)
from pysgmcmc.optimizers import get_optimizer
from pysgmcmc.optimizers.sghmc import SGHMC
from pysgmcmc.models.losses import NegativeLogLikelihood, get_loss, to_bayesian_loss
from pysgmcmc.progressbar import TrainingProgressbar
from pysgmcmc.torch_utils import get_name


[docs]class BayesianNeuralNetwork(object):
[docs] def __init__(self, network_architecture=simple_tanh_network, batch_size=20, normalize_input: bool=True, normalize_output: bool=True, num_steps: int=13000, burn_in_steps: int=3000, keep_every: int=100, loss=NegativeLogLikelihood, metrics=(nn.MSELoss,), logging_configuration: typing.Dict[str, typing.Any]={ "level": logging.INFO, "datefmt": "y/m/d" }, optimizer=SGHMC, **optimizer_kwargs)-> None: """ Bayesian Neural Network for regression problems. Bayesian Neural Networks use Bayesian methods to estimate the posterior distribution of a neural network's weights. This allows to also predict uncertainties for test points and thus makes Bayesian Neural Networks suitable for Bayesian optimization. This module uses stochastic gradient MCMC methods to sample from the posterior distribution. See [1] for more details. [1] J. T. Springenberg, A. Klein, S. Falkner, F. Hutter Bayesian Optimization with Robust Bayesian Neural Networks. In Advances in Neural Information Processing Systems 29 (2016). Parameters ---------- network_architecture : pysgmcmc.torch_typing.NetworkFactory, optional Function mapping integer input dimensionality to an (initialized) `torch.nn.Module`. normalize_input: bool, optional Specifies if inputs should be normalized to zero mean and unit variance. normalize_output: bool, optional Specifies whether outputs should be unnormalized. num_steps: int, optional Number of sampling steps to perform after burn-in is finished. In total, `num_steps // keep_every` network weights will be sampled. Defaults to `10000`. burn_in_steps: int, optional Number of burn-in steps to perform. This value is passed to the given `optimizer` if it supports special burn-in specific behavior. Networks sampled during burn-in are discarded. Defaults to `3000`. keep_every: int, optional Number of sampling steps (after burn-in) to perform before keeping a sample. In total, `num_steps // keep_every` network weights will be sampled. Defaults to `100`. loss : pysgmcmc.torch_typing.TorchLoss, optional Loss to use. Default: `pysgmcmc.models.losses.NegativeLogLikelihood` logging_configuration : typing.Dict[str, typing.Any], optional Configuration for pythons `logging` module to use. Specifying `"level"` as `logging.INFO` or lower in this dictionary enables displaying a progressbar for training. If no `"level"` is specified, `logging.INFO` is assumed as default choice. Defaults to `{"level": logging.INFO, "datefmt": "y/m/d"}`. optimizer : `torch.optim.Optimizer`, optional Function that returns a `torch.optim.optimizer.Optimizer` subclass. Defaults to `pysgmcmc.optimizers.sghmc.SGHMC`. """ assert burn_in_steps >= 0, "Invalid value for amount of burn-in steps -- cannot be negative." assert keep_every >= 1, "Invalid value for `keep_every`. Specify how many sampling steps to perform before keeping a sample." assert num_steps > burn_in_steps + keep_every, "Not even a single network would be sampled." assert batch_size >= 1, "Invalid batch size. Batches must contain at least a single sample." assert isinstance(logging_configuration, dict), "Given configuration for logging module must be a dictionary." assert callable(optimizer) assert callable(loss) self.batch_size = batch_size self.num_steps = num_steps self.num_burn_in_steps = burn_in_steps self.loss = loss self.metrics = metrics self.keep_every = keep_every self.normalize_input = normalize_input self.normalize_output = normalize_output self.optimizer = optimizer self.optimizer_kwargs = optimizer_kwargs self.network_architecture = network_architecture self.sampled_weights = [] # type: typing.List[typing.Tuple[np.ndarray]] logging.basicConfig(**logging_configuration) if "level" not in logging_configuration: logging.warn( "No level specified in 'logging_configuration' argument.\n" "Falling back to 'logging.INFO'." ) self.debug_level = logging_configuration.get("level", logging.INFO) logging.info("Performing %d iterations" % (self.num_steps)) self.use_progressbar = self.debug_level <= logging.INFO
[docs] def _keep_sample(self, step: int) -> bool: """ Determine if the network weight sample recorded at `step` should be stored. Samples are recorded after burn-in (`step > self.num_burn_in_steps`), and only every `self.keep_every` th step. Parameters ---------- step: int Current iteration count. Returns ---------- should_keep: bool Sentinel that is `True` if and only if network weights should be stored at `step`. """ if step < self.num_burn_in_steps: logging.debug("Skipping burn-in sample, step = %d" % step) return False sample_t = step - self.num_burn_in_steps return sample_t % self.keep_every == 0
@property def network_weights(self) -> np.ndarray: """ Extract current network weight values as `np.ndarray`. Returns ---------- weight_values: np.ndarray Numpy array containing current network weight values. """ return tuple( np.asarray(torch.tensor(parameter.data).numpy()) for parameter in self.model.parameters() ) @network_weights.setter def network_weights(self, weights: typing.List[np.ndarray]) -> None: """ Assign new `weights` to our neural networks parameters. Parameters ---------- weights : typing.List[np.ndarray] List of weight values to assign. Individual list elements must have shapes that match the network parameters with the same index in `self.network_weights`. Examples ---------- This serves as a handy bridge between our pytorch parameters and corresponding values for them represented as numpy arrays: >>> import numpy as np >>> bnn = BayesianNeuralNetwork() >>> input_dimensionality = 1 >>> bnn.model = bnn.network_architecture(input_dimensionality) >>> dummy_weights = [np.random.rand(parameter.shape) for parameter in bnn.model.parameters()] >>> bnn.network_weights = dummy_weights >>> np.allclose(bnn.network_weights, dummy_weights) True """ logging.debug("Assigning new network weights: %s" % str(weights)) for parameter, sample in zip(self.model.parameters(), weights): parameter.copy_(torch.from_numpy(sample))
[docs] def train(self, x_train: np.ndarray, y_train: np.ndarray): """ Train a BNN using input datapoints `x_train` with corresponding labels `y_train`. Parameters ---------- x_train : numpy.ndarray (N, D) Input training datapoints. y_train : numpy.ndarray (N,) Input training labels. """ logging.debug("Training started.") logging.debug("Clearing list of sampled weights.") self.sampled_weights.clear() num_datapoints, input_dimensionality = x_train.shape logging.debug( "Processing %d training datapoints " " with % dimensions each." % (num_datapoints, input_dimensionality) ) x_train_ = np.asarray(x_train) if self.normalize_input: logging.debug( "Normalizing training datapoints to " " zero mean and unit variance." ) x_train_, self.x_mean, self.x_std = zero_mean_unit_var_normalization(x_train) y_train_ = np.asarray(y_train) if self.normalize_output: logging.debug("Normalizing training labels to zero mean and unit variance.") y_train_, self.y_mean, self.y_std = zero_mean_unit_var_normalization(y_train) train_loader = infinite_dataloader( data_utils.DataLoader( data_utils.TensorDataset( torch.from_numpy(x_train_).float(), torch.from_numpy(y_train_).float() ), batch_size=self.batch_size ) ) try: architecture_name = self.network_architecture.__name__ except AttributeError: architecture_name = str(self.network_architecture) logging.debug("Using network architecture: %s" % architecture_name) self.model = self.network_architecture( input_dimensionality=input_dimensionality ) try: optimizer_name = self.optimizer.__name__ except AttributeError: optimizer_name = str(self.optimizer) logging.debug("Using optimizer: %s" % optimizer_name) optimizer = get_optimizer( optimizer_cls=self.optimizer, parameters=self.model.parameters(), num_datapoints=num_datapoints, **self.optimizer_kwargs ) loss_function = get_loss( self.loss, parameters=self.model.parameters(), num_datapoints=num_datapoints, size_average=True ) if self.use_progressbar: logging.info( "Progress bar enabled. To disable pass " "`logging_configuration={level: debug.WARN}`." ) losses = OrderedDict(((get_name(self.loss), loss_function),)) losses.update( (get_name(metric), to_bayesian_loss(metric)()) for metric in self.metrics ) batch_generator = TrainingProgressbar( iterable=islice(enumerate(train_loader), self.num_steps), losses=losses, total=self.num_steps, bar_format="{n_fmt}/{total_fmt}[{bar}] - {remaining} - {postfix}" ) else: batch_generator = islice(enumerate(train_loader), self.num_steps) for epoch, (x_batch, y_batch) in batch_generator: optimizer.zero_grad() loss = loss_function(input=self.model(x_batch), target=y_batch) loss.backward() optimizer.step() if self.use_progressbar: predictions = self.model(x_batch) batch_generator.update( predictions=predictions, y_batch=y_batch, epoch=epoch ) if self._keep_sample(epoch): logging.debug("Recording sample, epoch = %d " % (epoch)) weights = self.network_weights logging.debug("Sampled weights:\n%s" % str(weights)) self.sampled_weights.append(weights) self.is_trained = True return self
# Predict {{{ # def predict(self, x_test: np.ndarray, return_individual_predictions: bool=False): logging.debug("Predicting started.") x_test_ = np.asarray(x_test) logging.debug( "Processing %d test datapoints " " with %d dimensions each." % (x_test_.shape) ) if self.normalize_input: logging.debug( "Normalizing test datapoints to " " zero mean and unit variance." ) x_test_, *_ = zero_mean_unit_var_normalization(x_test, self.x_mean, self.x_std) def network_predict(x_test_, weights): logging.debug( "Predicting on data:\n%s Using weights:\n%s" % ( str(x_test_), str(weights) ) ) with torch.no_grad(): self.network_weights = weights return self.model(torch.from_numpy(x_test_).float()).numpy()[:, 0] logging.debug("Predicting with %d networks." % len(self.sampled_weights)) network_outputs = [ network_predict(x_test_, weights=weights) for weights in self.sampled_weights ] mean_prediction = np.mean(network_outputs, axis=0) variance_prediction = np.mean((network_outputs - mean_prediction) ** 2, axis=0) if self.normalize_output: logging.debug("Unnormalizing predictions.") logging.debug( "Mean of network predictions " "before unnormalization:\n%s" % str(mean_prediction) ) logging.debug( "Variance/Uncertainty of network predictions " "before unnormalization:\n%s" % str(variance_prediction) ) mean_prediction = zero_mean_unit_var_unnormalization( mean_prediction, self.y_mean, self.y_std ) variance_prediction *= self.y_std ** 2 logging.debug( "Mean of network predictions " "after unnormalization:\n%s" % str(mean_prediction) ) logging.debug( "Variance/Uncertainty of network predictions " "after unnormalization:\n%s" % str(variance_prediction) ) if return_individual_predictions: return mean_prediction, variance_prediction, network_outputs return mean_prediction, variance_prediction
# }}} Predict #