Merge pull request #323 from SheffieldML/stochastics

[stochastics] update for new stochastic iptimizers in gpy
This commit is contained in:
Max Zwiessele 2016-03-08 16:26:39 +00:00
commit af76126ef1
7 changed files with 188 additions and 24 deletions

View file

@ -30,6 +30,8 @@ install:
- source install_retry.sh
- pip install codecov
- pip install pypandoc
- pip install git+git://github.com/BRML/climin.git
- pip install autograd
- python setup.py develop
script:

View file

@ -1,5 +1,8 @@
from paramz.optimization import stochastics, Optimizer
from paramz.optimization import Optimizer
from . import stochastics
from paramz.optimization import *
import sys
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer

View file

@ -0,0 +1,119 @@
#===============================================================================
# Copyright (c) 2015, Max Zwiessele
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of paramax nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#===============================================================================
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
def reset(self):
"""
Reset the state of this stochastics generator.
"""
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
import numpy as np
self.Y = model.Y_normalized
bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset()
self.do_stochastics()
def do_stochastics(self):
import numpy as np
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else:
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
if self.missing_data:
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self):
self.current_dim = -1
self.d = None

View file

@ -99,6 +99,9 @@ class Stationary(Kern):
@Cache_this(limit=3, ignore_args=())
def dK_dr_via_X(self, X, X2):
"""
compute the derivative of K wrt X going through X
"""
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))

View file

@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1]
if X_variance == False:
if X_variance is False:
self.logger.info('no variance on X, activating sparse GPLVM')
X = Param("latent space", X)
elif X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape)
else:
if X_variance is None:
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
X_variance = np.random.uniform(0,.1,X.shape)
self.variational_prior = NormalPrior()
X = NormalPosterior(X, X_variance)
@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.X = X
self.link_parameter(self.X, 0)
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient = X_grad
#def set_X_gradients(self, X, X_grad):
# """Set the gradients of the posterior distribution of X in its specific form."""
# X.mean.gradient, X.variance.gradient = X_grad
def get_X_gradients(self, X):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient
#def get_X_gradients(self, X):
# """Get the gradients of the posterior distribution of X in its specific form."""
# return X.mean.gradient, X.variance.gradient
def _outer_values_update(self, full_values):
"""
@ -122,7 +123,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
if self.missing_data or not self.stochastics:
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
elif self.stochastics:
else: #self.stochastics is given:
d = self.output_dim
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d

View file

@ -41,6 +41,7 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1):
self._update_stochastics = False
# pick a sensible inference method
if inference_method is None:
@ -73,7 +74,14 @@ class SparseGPMiniBatch(SparseGP):
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
self.posterior = None
def optimize(self, optimizer=None, start=None, **kwargs):
try:
self._update_stochastics = True
SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs)
finally:
self._update_stochastics = False
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
@ -226,16 +234,16 @@ class SparseGPMiniBatch(SparseGP):
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
if not self.stochastics:
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print(message, end=' ')
#if not self.stochastics:
# m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
# message = m_f(-1)
# print(message, end=' ')
for d, ninan in self.stochastics.d:
if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ')
message = m_f(d)
print(message, end=' ')
#if not self.stochastics:
# print(' '*(len(message)) + '\r', end=' ')
# message = m_f(d)
# print(message, end=' ')
psi0ni = self.psi0[ninan]
psi1ni = self.psi1[ninan]
@ -262,8 +270,8 @@ class SparseGPMiniBatch(SparseGP):
woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print('')
#if not self.stochastics:
# print('')
if self.posterior is None:
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP):
if self.missing_data:
self._outer_loop_for_missing_data()
elif self.stochastics:
if self._update_stochastics:
self.stochastics.do_stochastics()
self._outer_loop_without_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)

View file

@ -54,7 +54,7 @@ class BGPLVMTest(unittest.TestCase):
def test_lik_comparisons_m0_s0(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=self.m_full.X.variance.values, missing_data=False, stochastic=False)
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
@ -124,6 +124,32 @@ class SparseGPMinibatchTest(unittest.TestCase):
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_sparsegp_init(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
np.random.seed(1234)
Z = self.X[np.random.choice(self.X.shape[0], replace=False, size=10)].copy()
Q = Z.shape[1]
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
assert(m.checkgrad())
m.optimize('adadelta', max_iters=10)
assert(m.checkgrad())
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
assert(m.checkgrad())
m.optimize('rprop', max_iters=10)
assert(m.checkgrad())
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
assert(m.checkgrad())
m.optimize('rprop', max_iters=10)
assert(m.checkgrad())
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
assert(m.checkgrad())
m.optimize('adadelta', max_iters=10)
assert(m.checkgrad())
def test_predict_missing_data(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]