Merge pull request #323 from SheffieldML/stochastics

[stochastics] update for new stochastic iptimizers in gpy
2026-05-27 14:25:16 +02:00 · 2016-03-08 16:26:39 +00:00 · 2016-03-08 16:26:39 +00:00 · af76126ef1
commit af76126ef1
parent c72d085e2b fede805526
7 changed files with 188 additions and 24 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -30,6 +30,8 @@ install:
 - source install_retry.sh
 - pip install codecov
 - pip install pypandoc
 - pip install git+git://github.com/BRML/climin.git
 - pip install autograd
 - python setup.py develop
 script:
--- a/GPy/inference/optimization/init.py
+++ b/GPy/inference/optimization/init.py
@ -1,5 +1,8 @@
-from paramz.optimization import stochastics, Optimizer
+from paramz.optimization import Optimizer
 from . import stochastics
 from paramz.optimization import *
 import sys
 sys.modules['GPy.inference.optimization.stochastics'] = stochastics
 sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
--- a/GPy/inference/optimization/stochastics.py
+++ b/GPy/inference/optimization/stochastics.py
@ -0,0 +1,119 @@
 #===============================================================================
 # Copyright (c) 2015, Max Zwiessele
 # All rights reserved.
 # 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 # 
 # * Redistributions of source code must retain the above copyright notice, this
 #   list of conditions and the following disclaimer.
 # 
 # * Redistributions in binary form must reproduce the above copyright notice,
 #   this list of conditions and the following disclaimer in the documentation
 #   and/or other materials provided with the distribution.
 # 
 # * Neither the name of paramax nor the names of its
 #   contributors may be used to endorse or promote products derived from
 #   this software without specific prior written permission.
 # 
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #===============================================================================
 class StochasticStorage(object):
    '''
    This is a container for holding the stochastic parameters,
    such as subset indices or step length and so on.
    self.d has to be a list of lists:
    [dimension indices, nan indices for those dimensions]
    so that the minibatches can be used as efficiently as possible.
    '''
    def __init__(self, model):
        """
        Initialize this stochastic container using the given model
        """
    def do_stochastics(self):
        """
        Update the internal state to the next batch of the stochastic
        descent algorithm.
        """
        pass
    def reset(self):
        """
        Reset the state of this stochastics generator.
        """
 class SparseGPMissing(StochasticStorage):
    def __init__(self, model, batchsize=1):
        """
        Here we want to loop over all dimensions everytime.
        Thus, we can just make sure the loop goes over self.d every
        time. We will try to get batches which look the same together
        which speeds up calculations significantly.
        """
        import numpy as np
        self.Y = model.Y_normalized
        bdict = {}
        #For N > 1000 array2string default crops
        opt = np.get_printoptions()
        np.set_printoptions(threshold=np.inf)
        for d in range(self.Y.shape[1]):
            inan = np.isnan(self.Y)[:, d]
            arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
            try:
                bdict[arr_str][0].append(d)
            except:
                bdict[arr_str] = [[d], ~inan]
        np.set_printoptions(**opt)
        self.d = bdict.values()
 class SparseGPStochastics(StochasticStorage):
    """
    For the sparse gp we need to store the dimension we are in,
    and the indices corresponding to those
    """
    def __init__(self, model, batchsize=1, missing_data=True):
        self.batchsize = batchsize
        self.output_dim = model.Y.shape[1]
        self.Y = model.Y_normalized
        self.missing_data = missing_data
        self.reset()
        self.do_stochastics()
    def do_stochastics(self):
        import numpy as np
        if self.batchsize == 1:
            self.current_dim = (self.current_dim+1)%self.output_dim
            self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
        else:
            self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
            bdict = {}
            if self.missing_data:
                opt = np.get_printoptions()
                np.set_printoptions(threshold=np.inf)
                for d in self.d:
                    inan = np.isnan(self.Y[:, d])
                    arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
                    try:
                        bdict[arr_str][0].append(d)
                    except:
                        bdict[arr_str] = [[d], ~inan]
                np.set_printoptions(**opt)
                self.d = bdict.values()
            else:
                self.d = [[self.d, None]]
    def reset(self):
        self.current_dim = -1
        self.d = None
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -99,6 +99,9 @@ class Stationary(Kern):
    @Cache_this(limit=3, ignore_args=())
    def dK_dr_via_X(self, X, X2):
        """
        compute the derivative of K wrt X going through X
        """
        #a convenience function, so we can cache dK_dr
        return self.dK_dr(self._scaled_dist(X, X2))
--- a/GPy/models/bayesian_gplvm_minibatch.py
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]
-        if X_variance == False:
+        if X_variance is False:
            self.logger.info('no variance on X, activating sparse GPLVM')
            X = Param("latent space", X)
-        elif X_variance is None:
+        else:
-            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
+            if X_variance is None:
-            X_variance = np.random.uniform(0,.1,X.shape)
+                self.logger.info("initializing latent space variance ~ uniform(0,.1)")
                X_variance = np.random.uniform(0,.1,X.shape)
            self.variational_prior = NormalPrior()
            X = NormalPosterior(X, X_variance)
@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
        self.X = X
        self.link_parameter(self.X, 0)
-    def set_X_gradients(self, X, X_grad):
+    #def set_X_gradients(self, X, X_grad):
-        """Set the gradients of the posterior distribution of X in its specific form."""
+    #    """Set the gradients of the posterior distribution of X in its specific form."""
-        X.mean.gradient, X.variance.gradient = X_grad
+    #    X.mean.gradient, X.variance.gradient = X_grad
-    def get_X_gradients(self, X):
+    #def get_X_gradients(self, X):
-        """Get the gradients of the posterior distribution of X in its specific form."""
+    #    """Get the gradients of the posterior distribution of X in its specific form."""
-        return X.mean.gradient, X.variance.gradient
+    #    return X.mean.gradient, X.variance.gradient
    def _outer_values_update(self, full_values):
        """
@ -122,7 +123,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
            if self.missing_data or not self.stochastics:
                self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
-            elif self.stochastics:
+            else: #self.stochastics is given:
                d = self.output_dim
                self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@ -41,6 +41,7 @@ class SparseGPMiniBatch(SparseGP):
    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
                 name='sparse gp', Y_metadata=None, normalizer=False,
                 missing_data=False, stochastic=False, batchsize=1):
        self._update_stochastics = False
        # pick a sensible inference method
        if inference_method is None:
@ -74,6 +75,13 @@ class SparseGPMiniBatch(SparseGP):
        self.link_parameter(self.Z, index=0)
        self.posterior = None
    def optimize(self, optimizer=None, start=None, **kwargs):
        try:
            self._update_stochastics = True
            SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs)
        finally:
            self._update_stochastics = False
    def has_uncertain_inputs(self):
        return isinstance(self.X, VariationalPosterior)
@ -226,16 +234,16 @@ class SparseGPMiniBatch(SparseGP):
            woodbury_inv = self.posterior._woodbury_inv
            woodbury_vector = self.posterior._woodbury_vector
-        if not self.stochastics:
+        #if not self.stochastics:
-            m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
+        #    m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
-            message = m_f(-1)
+        #    message = m_f(-1)
-            print(message, end=' ')
+        #    print(message, end=' ')
        for d, ninan in self.stochastics.d:
-            if not self.stochastics:
+            #if not self.stochastics:
-                print(' '*(len(message)) + '\r', end=' ')
+            #    print(' '*(len(message)) + '\r', end=' ')
-                message = m_f(d)
+            #    message = m_f(d)
-                print(message, end=' ')
+            #    print(message, end=' ')
            psi0ni = self.psi0[ninan]
            psi1ni = self.psi1[ninan]
@ -262,8 +270,8 @@ class SparseGPMiniBatch(SparseGP):
            woodbury_vector[:, d] = posterior.woodbury_vector
            self._log_marginal_likelihood += log_marginal_likelihood
-        if not self.stochastics:
+        #if not self.stochastics:
-            print('')
+        #    print('')
        if self.posterior is None:
            self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP):
        if self.missing_data:
            self._outer_loop_for_missing_data()
        elif self.stochastics:
            if self._update_stochastics:
                self.stochastics.do_stochastics()
            self._outer_loop_without_missing_data()
        else:
            self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
--- a/GPy/testing/minibatch_tests.py
+++ b/GPy/testing/minibatch_tests.py
@ -54,7 +54,7 @@ class BGPLVMTest(unittest.TestCase):
    def test_lik_comparisons_m0_s0(self):
        # Test if the different implementations give the exact same likelihood as the full model.
        # All of the following settings should give the same likelihood and gradients as the full model:
-        m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
+        m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=self.m_full.X.variance.values, missing_data=False, stochastic=False)
        m[:] = self.m_full[:]
        np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
        np.testing.assert_allclose(m.gradient, self.m_full.gradient)
@ -124,6 +124,32 @@ class SparseGPMinibatchTest(unittest.TestCase):
        np.testing.assert_allclose(m.gradient, self.m_full.gradient)
        assert(m.checkgrad())
    def test_sparsegp_init(self):
        # Test if the different implementations give the exact same likelihood as the full model.
        # All of the following settings should give the same likelihood and gradients as the full model:
        np.random.seed(1234)
        Z = self.X[np.random.choice(self.X.shape[0], replace=False, size=10)].copy()
        Q = Z.shape[1]
        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
        assert(m.checkgrad())
        m.optimize('adadelta', max_iters=10)
        assert(m.checkgrad())
        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
        assert(m.checkgrad())
        m.optimize('rprop', max_iters=10)
        assert(m.checkgrad())
        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
        assert(m.checkgrad())
        m.optimize('rprop', max_iters=10)
        assert(m.checkgrad())
        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
        assert(m.checkgrad())
        m.optimize('adadelta', max_iters=10)
        assert(m.checkgrad())
    def test_predict_missing_data(self):
        m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
        m[:] = self.m_full[:]