From e4a4344334709abcc648acf368d9b1301c7042be Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 09:47:24 +0000 Subject: [PATCH 1/7] [stochastics] update for new stochastic iptimizers in gpy --- GPy/inference/optimization/__init__.py | 5 +- GPy/inference/optimization/stochastics.py | 119 ++++++++++++++++++++++ GPy/models/sparse_gp_minibatch.py | 12 ++- GPy/testing/minibatch_tests.py | 18 ++++ 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 GPy/inference/optimization/stochastics.py diff --git a/GPy/inference/optimization/__init__.py b/GPy/inference/optimization/__init__.py index a6247d96..2fa96960 100644 --- a/GPy/inference/optimization/__init__.py +++ b/GPy/inference/optimization/__init__.py @@ -1,5 +1,8 @@ -from paramz.optimization import stochastics, Optimizer +from paramz.optimization import Optimizer +from . import stochastics + from paramz.optimization import * import sys + sys.modules['GPy.inference.optimization.stochastics'] = stochastics sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py new file mode 100644 index 00000000..41f5320b --- /dev/null +++ b/GPy/inference/optimization/stochastics.py @@ -0,0 +1,119 @@ +#=============================================================================== +# Copyright (c) 2015, Max Zwiessele +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of paramax nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#=============================================================================== + +class StochasticStorage(object): + ''' + This is a container for holding the stochastic parameters, + such as subset indices or step length and so on. + + self.d has to be a list of lists: + [dimension indices, nan indices for those dimensions] + so that the minibatches can be used as efficiently as possible. + ''' + def __init__(self, model): + """ + Initialize this stochastic container using the given model + """ + + def do_stochastics(self): + """ + Update the internal state to the next batch of the stochastic + descent algorithm. + """ + pass + + def reset(self): + """ + Reset the state of this stochastics generator. + """ + +class SparseGPMissing(StochasticStorage): + def __init__(self, model, batchsize=1): + """ + Here we want to loop over all dimensions everytime. + Thus, we can just make sure the loop goes over self.d every + time. We will try to get batches which look the same together + which speeds up calculations significantly. + """ + import numpy as np + self.Y = model.Y_normalized + bdict = {} + #For N > 1000 array2string default crops + opt = np.get_printoptions() + np.set_printoptions(threshold=np.inf) + for d in range(self.Y.shape[1]): + inan = np.isnan(self.Y)[:, d] + arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'}) + try: + bdict[arr_str][0].append(d) + except: + bdict[arr_str] = [[d], ~inan] + np.set_printoptions(**opt) + self.d = bdict.values() + +class SparseGPStochastics(StochasticStorage): + """ + For the sparse gp we need to store the dimension we are in, + and the indices corresponding to those + """ + def __init__(self, model, batchsize=1, missing_data=True): + self.batchsize = batchsize + self.output_dim = model.Y.shape[1] + self.Y = model.Y_normalized + self.missing_data = missing_data + self.reset() + self.do_stochastics() + + def do_stochastics(self): + import numpy as np + if self.batchsize == 1: + self.current_dim = (self.current_dim+1)%self.output_dim + self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]] + else: + self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) + bdict = {} + if self.missing_data: + opt = np.get_printoptions() + np.set_printoptions(threshold=np.inf) + for d in self.d: + inan = np.isnan(self.Y[:, d]) + arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'}) + try: + bdict[arr_str][0].append(d) + except: + bdict[arr_str] = [[d], ~inan] + np.set_printoptions(**opt) + self.d = bdict.values() + else: + self.d = [[self.d, None]] + + def reset(self): + self.current_dim = -1 + self.d = None diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 6afb19e9..92a340f5 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -41,6 +41,7 @@ class SparseGPMiniBatch(SparseGP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False, missing_data=False, stochastic=False, batchsize=1): + self._update_stochastics = False # pick a sensible inference method if inference_method is None: @@ -73,7 +74,14 @@ class SparseGPMiniBatch(SparseGP): logger.info("Adding Z as parameter") self.link_parameter(self.Z, index=0) self.posterior = None - + + def optimize(self, optimizer=None, start=None, **kwargs): + try: + self._update_stochastics = True + SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs) + finally: + self._update_stochastics = False + def has_uncertain_inputs(self): return isinstance(self.X, VariationalPosterior) @@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP): if self.missing_data: self._outer_loop_for_missing_data() elif self.stochastics: + if self._update_stochastics: + self.stochastics.do_stochastics() self._outer_loop_without_missing_data() else: self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py index 6dd1db22..a5e9a884 100644 --- a/GPy/testing/minibatch_tests.py +++ b/GPy/testing/minibatch_tests.py @@ -124,6 +124,24 @@ class SparseGPMinibatchTest(unittest.TestCase): np.testing.assert_allclose(m.gradient, self.m_full.gradient) assert(m.checkgrad()) + def test_sparsegp_init(self): + # Test if the different implementations give the exact same likelihood as the full model. + # All of the following settings should give the same likelihood and gradients as the full model: + np.random.seed(1234) + Z = self.X[np.random.choice(self.X.shape[0], replace=False, size=10)].copy() + Q = Z.shape[1] + m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False) + assert(m.checkgrad()) + + m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True) + assert(m.checkgrad()) + + m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False) + assert(m.checkgrad()) + + m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True) + assert(m.checkgrad()) + def test_predict_missing_data(self): m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1]) m[:] = self.m_full[:] From eaf20a952edbd0986b948882a454679adce9a242 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 10:01:07 +0000 Subject: [PATCH 2/7] [stochastics] added optimization for a few runs --- GPy/testing/minibatch_tests.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py index a5e9a884..7b39af95 100644 --- a/GPy/testing/minibatch_tests.py +++ b/GPy/testing/minibatch_tests.py @@ -132,15 +132,23 @@ class SparseGPMinibatchTest(unittest.TestCase): Q = Z.shape[1] m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False) assert(m.checkgrad()) + m.optimize(max_iters=10) + assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True) assert(m.checkgrad()) + m.optimize(max_iters=10) + assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False) assert(m.checkgrad()) + m.optimize(max_iters=10) + assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True) assert(m.checkgrad()) + m.optimize(max_iters=10) + assert(m.checkgrad()) def test_predict_missing_data(self): m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1]) From 88a9b92c80402f1288e9192a6987acd8562d199b Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 10:23:05 +0000 Subject: [PATCH 3/7] [climin] added tests and install directions for travis --- .travis.yml | 1 + GPy/testing/minibatch_tests.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0e9efae1..f4c38549 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,6 +30,7 @@ install: - source install_retry.sh - pip install codecov - pip install pypandoc +- pip install git+git://github.com/BRML/climin.git - python setup.py develop script: diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py index 7b39af95..d217cb16 100644 --- a/GPy/testing/minibatch_tests.py +++ b/GPy/testing/minibatch_tests.py @@ -132,22 +132,22 @@ class SparseGPMinibatchTest(unittest.TestCase): Q = Z.shape[1] m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False) assert(m.checkgrad()) - m.optimize(max_iters=10) + m.optimize('adadelta', max_iters=10) assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True) assert(m.checkgrad()) - m.optimize(max_iters=10) + m.optimize('rprop', max_iters=10) assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False) assert(m.checkgrad()) - m.optimize(max_iters=10) + m.optimize('rprop', max_iters=10) assert(m.checkgrad()) m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True) assert(m.checkgrad()) - m.optimize(max_iters=10) + m.optimize('adadelta', max_iters=10) assert(m.checkgrad()) def test_predict_missing_data(self): From 67043e83d71f0411b6a4ab4629d181a4158e54f2 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 11:19:50 +0000 Subject: [PATCH 4/7] [minibatch] added coverage for branching, spottet bug in X_variance --- GPy/models/bayesian_gplvm_minibatch.py | 23 ++++++++++++----------- GPy/testing/minibatch_tests.py | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py index cc17e9ee..2a457a21 100644 --- a/GPy/models/bayesian_gplvm_minibatch.py +++ b/GPy/models/bayesian_gplvm_minibatch.py @@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] - if X_variance == False: + if X_variance is False: self.logger.info('no variance on X, activating sparse GPLVM') X = Param("latent space", X) - elif X_variance is None: - self.logger.info("initializing latent space variance ~ uniform(0,.1)") - X_variance = np.random.uniform(0,.1,X.shape) + else: + if X_variance is None: + self.logger.info("initializing latent space variance ~ uniform(0,.1)") + X_variance = np.random.uniform(0,.1,X.shape) self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) @@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): self.X = X self.link_parameter(self.X, 0) - def set_X_gradients(self, X, X_grad): - """Set the gradients of the posterior distribution of X in its specific form.""" - X.mean.gradient, X.variance.gradient = X_grad + #def set_X_gradients(self, X, X_grad): + # """Set the gradients of the posterior distribution of X in its specific form.""" + # X.mean.gradient, X.variance.gradient = X_grad - def get_X_gradients(self, X): - """Get the gradients of the posterior distribution of X in its specific form.""" - return X.mean.gradient, X.variance.gradient + #def get_X_gradients(self, X): + # """Get the gradients of the posterior distribution of X in its specific form.""" + # return X.mean.gradient, X.variance.gradient def _outer_values_update(self, full_values): """ @@ -122,7 +123,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch): if self.missing_data or not self.stochastics: self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) - elif self.stochastics: + else: #self.stochastics is given: d = self.output_dim self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py index d217cb16..fbf12939 100644 --- a/GPy/testing/minibatch_tests.py +++ b/GPy/testing/minibatch_tests.py @@ -54,7 +54,7 @@ class BGPLVMTest(unittest.TestCase): def test_lik_comparisons_m0_s0(self): # Test if the different implementations give the exact same likelihood as the full model. # All of the following settings should give the same likelihood and gradients as the full model: - m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False) + m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=self.m_full.X.variance.values, missing_data=False, stochastic=False) m[:] = self.m_full[:] np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7) np.testing.assert_allclose(m.gradient, self.m_full.gradient) From eeb2733059fa53ac18eca9ed07f9c8b8224109ef Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 12:04:24 +0000 Subject: [PATCH 5/7] [sparse gp] commented out print statements, which are never used --- GPy/models/sparse_gp_minibatch.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py index 92a340f5..d1c252f8 100644 --- a/GPy/models/sparse_gp_minibatch.py +++ b/GPy/models/sparse_gp_minibatch.py @@ -234,16 +234,16 @@ class SparseGPMiniBatch(SparseGP): woodbury_inv = self.posterior._woodbury_inv woodbury_vector = self.posterior._woodbury_vector - if not self.stochastics: - m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim) - message = m_f(-1) - print(message, end=' ') + #if not self.stochastics: + # m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim) + # message = m_f(-1) + # print(message, end=' ') for d, ninan in self.stochastics.d: - if not self.stochastics: - print(' '*(len(message)) + '\r', end=' ') - message = m_f(d) - print(message, end=' ') + #if not self.stochastics: + # print(' '*(len(message)) + '\r', end=' ') + # message = m_f(d) + # print(message, end=' ') psi0ni = self.psi0[ninan] psi1ni = self.psi1[ninan] @@ -270,8 +270,8 @@ class SparseGPMiniBatch(SparseGP): woodbury_vector[:, d] = posterior.woodbury_vector self._log_marginal_likelihood += log_marginal_likelihood - if not self.stochastics: - print('') + #if not self.stochastics: + # print('') if self.posterior is None: self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, From 51af6a236a97613cb36866a5506ebfa8b0e1b8bd Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 14:59:28 +0000 Subject: [PATCH 6/7] [autograd] added install instr for autograd --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f4c38549..b236d515 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,7 @@ install: - pip install codecov - pip install pypandoc - pip install git+git://github.com/BRML/climin.git +- pip install autograd - python setup.py develop script: From fede8055267251405c7d13516986153d514a7ae0 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 8 Mar 2016 14:59:53 +0000 Subject: [PATCH 7/7] [kern] added doc string --- GPy/kern/src/stationary.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 5451c7a3..286edcc2 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -99,6 +99,9 @@ class Stationary(Kern): @Cache_this(limit=3, ignore_args=()) def dK_dr_via_X(self, X, X2): + """ + compute the derivative of K wrt X going through X + """ #a convenience function, so we can cache dK_dr return self.dK_dr(self._scaled_dist(X, X2))