From e4a4344334709abcc648acf368d9b1301c7042be Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 09:47:24 +0000
Subject: [PATCH 1/7] [stochastics] update for new stochastic iptimizers in gpy

---
 GPy/inference/optimization/__init__.py    |   5 +-
 GPy/inference/optimization/stochastics.py | 119 ++++++++++++++++++++++
 GPy/models/sparse_gp_minibatch.py         |  12 ++-
 GPy/testing/minibatch_tests.py            |  18 ++++
 4 files changed, 152 insertions(+), 2 deletions(-)
 create mode 100644 GPy/inference/optimization/stochastics.py

diff --git a/GPy/inference/optimization/__init__.py b/GPy/inference/optimization/__init__.py
index a6247d96..2fa96960 100644
--- a/GPy/inference/optimization/__init__.py
+++ b/GPy/inference/optimization/__init__.py
@@ -1,5 +1,8 @@
-from paramz.optimization import stochastics, Optimizer
+from paramz.optimization import Optimizer
+from . import stochastics
+
 from paramz.optimization import *
 import sys
+
 sys.modules['GPy.inference.optimization.stochastics'] = stochastics
 sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py
new file mode 100644
index 00000000..41f5320b
--- /dev/null
+++ b/GPy/inference/optimization/stochastics.py
@@ -0,0 +1,119 @@
+#===============================================================================
+# Copyright (c) 2015, Max Zwiessele
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# 
+# * Neither the name of paramax nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#===============================================================================
+
+class StochasticStorage(object):
+    '''
+    This is a container for holding the stochastic parameters,
+    such as subset indices or step length and so on.
+
+    self.d has to be a list of lists:
+    [dimension indices, nan indices for those dimensions]
+    so that the minibatches can be used as efficiently as possible.
+    '''
+    def __init__(self, model):
+        """
+        Initialize this stochastic container using the given model
+        """
+
+    def do_stochastics(self):
+        """
+        Update the internal state to the next batch of the stochastic
+        descent algorithm.
+        """
+        pass
+
+    def reset(self):
+        """
+        Reset the state of this stochastics generator.
+        """
+
+class SparseGPMissing(StochasticStorage):
+    def __init__(self, model, batchsize=1):
+        """
+        Here we want to loop over all dimensions everytime.
+        Thus, we can just make sure the loop goes over self.d every
+        time. We will try to get batches which look the same together
+        which speeds up calculations significantly.
+        """
+        import numpy as np
+        self.Y = model.Y_normalized
+        bdict = {}
+        #For N > 1000 array2string default crops
+        opt = np.get_printoptions()
+        np.set_printoptions(threshold=np.inf)
+        for d in range(self.Y.shape[1]):
+            inan = np.isnan(self.Y)[:, d]
+            arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
+            try:
+                bdict[arr_str][0].append(d)
+            except:
+                bdict[arr_str] = [[d], ~inan]
+        np.set_printoptions(**opt)
+        self.d = bdict.values()
+
+class SparseGPStochastics(StochasticStorage):
+    """
+    For the sparse gp we need to store the dimension we are in,
+    and the indices corresponding to those
+    """
+    def __init__(self, model, batchsize=1, missing_data=True):
+        self.batchsize = batchsize
+        self.output_dim = model.Y.shape[1]
+        self.Y = model.Y_normalized
+        self.missing_data = missing_data
+        self.reset()
+        self.do_stochastics()
+
+    def do_stochastics(self):
+        import numpy as np
+        if self.batchsize == 1:
+            self.current_dim = (self.current_dim+1)%self.output_dim
+            self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
+        else:
+            self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
+            bdict = {}
+            if self.missing_data:
+                opt = np.get_printoptions()
+                np.set_printoptions(threshold=np.inf)
+                for d in self.d:
+                    inan = np.isnan(self.Y[:, d])
+                    arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
+                    try:
+                        bdict[arr_str][0].append(d)
+                    except:
+                        bdict[arr_str] = [[d], ~inan]
+                np.set_printoptions(**opt)
+                self.d = bdict.values()
+            else:
+                self.d = [[self.d, None]]
+
+    def reset(self):
+        self.current_dim = -1
+        self.d = None
diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index 6afb19e9..92a340f5 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -41,6 +41,7 @@ class SparseGPMiniBatch(SparseGP):
     def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
                  name='sparse gp', Y_metadata=None, normalizer=False,
                  missing_data=False, stochastic=False, batchsize=1):
+        self._update_stochastics = False
 
         # pick a sensible inference method
         if inference_method is None:
@@ -73,7 +74,14 @@ class SparseGPMiniBatch(SparseGP):
         logger.info("Adding Z as parameter")
         self.link_parameter(self.Z, index=0)
         self.posterior = None
-
+        
+    def optimize(self, optimizer=None, start=None, **kwargs):
+        try:
+            self._update_stochastics = True
+            SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs)
+        finally:
+            self._update_stochastics = False
+            
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
 
@@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP):
         if self.missing_data:
             self._outer_loop_for_missing_data()
         elif self.stochastics:
+            if self._update_stochastics:
+                self.stochastics.do_stochastics()
             self._outer_loop_without_missing_data()
         else:
             self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py
index 6dd1db22..a5e9a884 100644
--- a/GPy/testing/minibatch_tests.py
+++ b/GPy/testing/minibatch_tests.py
@@ -124,6 +124,24 @@ class SparseGPMinibatchTest(unittest.TestCase):
         np.testing.assert_allclose(m.gradient, self.m_full.gradient)
         assert(m.checkgrad())
 
+    def test_sparsegp_init(self):
+        # Test if the different implementations give the exact same likelihood as the full model.
+        # All of the following settings should give the same likelihood and gradients as the full model:
+        np.random.seed(1234)
+        Z = self.X[np.random.choice(self.X.shape[0], replace=False, size=10)].copy()
+        Q = Z.shape[1]
+        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
+        assert(m.checkgrad())
+
+        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
+        assert(m.checkgrad())
+        
+        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
+        assert(m.checkgrad())
+        
+        m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
+        assert(m.checkgrad())
+
     def test_predict_missing_data(self):
         m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
         m[:] = self.m_full[:]

From eaf20a952edbd0986b948882a454679adce9a242 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 10:01:07 +0000
Subject: [PATCH 2/7] [stochastics] added optimization for a few runs

---
 GPy/testing/minibatch_tests.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py
index a5e9a884..7b39af95 100644
--- a/GPy/testing/minibatch_tests.py
+++ b/GPy/testing/minibatch_tests.py
@@ -132,15 +132,23 @@ class SparseGPMinibatchTest(unittest.TestCase):
         Q = Z.shape[1]
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
         assert(m.checkgrad())
+        m.optimize(max_iters=10)
+        assert(m.checkgrad())
 
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
         assert(m.checkgrad())
+        m.optimize(max_iters=10)
+        assert(m.checkgrad())
         
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
         assert(m.checkgrad())
+        m.optimize(max_iters=10)
+        assert(m.checkgrad())
         
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
         assert(m.checkgrad())
+        m.optimize(max_iters=10)
+        assert(m.checkgrad())
 
     def test_predict_missing_data(self):
         m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])

From 88a9b92c80402f1288e9192a6987acd8562d199b Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 10:23:05 +0000
Subject: [PATCH 3/7] [climin] added tests and install directions for travis

---
 .travis.yml                    | 1 +
 GPy/testing/minibatch_tests.py | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0e9efae1..f4c38549 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,6 +30,7 @@ install:
 - source install_retry.sh
 - pip install codecov
 - pip install pypandoc
+- pip install git+git://github.com/BRML/climin.git
 - python setup.py develop
 
 script:
diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py
index 7b39af95..d217cb16 100644
--- a/GPy/testing/minibatch_tests.py
+++ b/GPy/testing/minibatch_tests.py
@@ -132,22 +132,22 @@ class SparseGPMinibatchTest(unittest.TestCase):
         Q = Z.shape[1]
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
         assert(m.checkgrad())
-        m.optimize(max_iters=10)
+        m.optimize('adadelta', max_iters=10)
         assert(m.checkgrad())
 
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
         assert(m.checkgrad())
-        m.optimize(max_iters=10)
+        m.optimize('rprop', max_iters=10)
         assert(m.checkgrad())
         
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
         assert(m.checkgrad())
-        m.optimize(max_iters=10)
+        m.optimize('rprop', max_iters=10)
         assert(m.checkgrad())
         
         m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
         assert(m.checkgrad())
-        m.optimize(max_iters=10)
+        m.optimize('adadelta', max_iters=10)
         assert(m.checkgrad())
 
     def test_predict_missing_data(self):

From 67043e83d71f0411b6a4ab4629d181a4158e54f2 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 11:19:50 +0000
Subject: [PATCH 4/7] [minibatch] added coverage for branching, spottet bug in
 X_variance

---
 GPy/models/bayesian_gplvm_minibatch.py | 23 ++++++++++++-----------
 GPy/testing/minibatch_tests.py         |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/GPy/models/bayesian_gplvm_minibatch.py b/GPy/models/bayesian_gplvm_minibatch.py
index cc17e9ee..2a457a21 100644
--- a/GPy/models/bayesian_gplvm_minibatch.py
+++ b/GPy/models/bayesian_gplvm_minibatch.py
@@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
             Z = np.random.permutation(X.copy())[:num_inducing]
         assert Z.shape[1] == X.shape[1]
 
-        if X_variance == False:
+        if X_variance is False:
             self.logger.info('no variance on X, activating sparse GPLVM')
             X = Param("latent space", X)
-        elif X_variance is None:
-            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
-            X_variance = np.random.uniform(0,.1,X.shape)
+        else:
+            if X_variance is None:
+                self.logger.info("initializing latent space variance ~ uniform(0,.1)")
+                X_variance = np.random.uniform(0,.1,X.shape)
             self.variational_prior = NormalPrior()
             X = NormalPosterior(X, X_variance)
 
@@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
         self.X = X
         self.link_parameter(self.X, 0)
 
-    def set_X_gradients(self, X, X_grad):
-        """Set the gradients of the posterior distribution of X in its specific form."""
-        X.mean.gradient, X.variance.gradient = X_grad
+    #def set_X_gradients(self, X, X_grad):
+    #    """Set the gradients of the posterior distribution of X in its specific form."""
+    #    X.mean.gradient, X.variance.gradient = X_grad
 
-    def get_X_gradients(self, X):
-        """Get the gradients of the posterior distribution of X in its specific form."""
-        return X.mean.gradient, X.variance.gradient
+    #def get_X_gradients(self, X):
+    #    """Get the gradients of the posterior distribution of X in its specific form."""
+    #    return X.mean.gradient, X.variance.gradient
 
     def _outer_values_update(self, full_values):
         """
@@ -122,7 +123,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
 
             if self.missing_data or not self.stochastics:
                 self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
-            elif self.stochastics:
+            else: #self.stochastics is given:
                 d = self.output_dim
                 self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
 
diff --git a/GPy/testing/minibatch_tests.py b/GPy/testing/minibatch_tests.py
index d217cb16..fbf12939 100644
--- a/GPy/testing/minibatch_tests.py
+++ b/GPy/testing/minibatch_tests.py
@@ -54,7 +54,7 @@ class BGPLVMTest(unittest.TestCase):
     def test_lik_comparisons_m0_s0(self):
         # Test if the different implementations give the exact same likelihood as the full model.
         # All of the following settings should give the same likelihood and gradients as the full model:
-        m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
+        m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=self.m_full.X.variance.values, missing_data=False, stochastic=False)
         m[:] = self.m_full[:]
         np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
         np.testing.assert_allclose(m.gradient, self.m_full.gradient)

From eeb2733059fa53ac18eca9ed07f9c8b8224109ef Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 12:04:24 +0000
Subject: [PATCH 5/7] [sparse gp] commented out print statements, which are
 never used

---
 GPy/models/sparse_gp_minibatch.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index 92a340f5..d1c252f8 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -234,16 +234,16 @@ class SparseGPMiniBatch(SparseGP):
             woodbury_inv = self.posterior._woodbury_inv
             woodbury_vector = self.posterior._woodbury_vector
 
-        if not self.stochastics:
-            m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
-            message = m_f(-1)
-            print(message, end=' ')
+        #if not self.stochastics:
+        #    m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
+        #    message = m_f(-1)
+        #    print(message, end=' ')
 
         for d, ninan in self.stochastics.d:
-            if not self.stochastics:
-                print(' '*(len(message)) + '\r', end=' ')
-                message = m_f(d)
-                print(message, end=' ')
+            #if not self.stochastics:
+            #    print(' '*(len(message)) + '\r', end=' ')
+            #    message = m_f(d)
+            #    print(message, end=' ')
 
             psi0ni = self.psi0[ninan]
             psi1ni = self.psi1[ninan]
@@ -270,8 +270,8 @@ class SparseGPMiniBatch(SparseGP):
             woodbury_vector[:, d] = posterior.woodbury_vector
             self._log_marginal_likelihood += log_marginal_likelihood
 
-        if not self.stochastics:
-            print('')
+        #if not self.stochastics:
+        #    print('')
 
         if self.posterior is None:
             self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,

From 51af6a236a97613cb36866a5506ebfa8b0e1b8bd Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 14:59:28 +0000
Subject: [PATCH 6/7] [autograd] added install instr for autograd

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index f4c38549..b236d515 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,6 +31,7 @@ install:
 - pip install codecov
 - pip install pypandoc
 - pip install git+git://github.com/BRML/climin.git
+- pip install autograd
 - python setup.py develop
 
 script:

From fede8055267251405c7d13516986153d514a7ae0 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 8 Mar 2016 14:59:53 +0000
Subject: [PATCH 7/7] [kern] added doc string

---
 GPy/kern/src/stationary.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 5451c7a3..286edcc2 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -99,6 +99,9 @@ class Stationary(Kern):
 
     @Cache_this(limit=3, ignore_args=())
     def dK_dr_via_X(self, X, X2):
+        """
+        compute the derivative of K wrt X going through X
+        """
         #a convenience function, so we can cache dK_dr
         return self.dK_dr(self._scaled_dist(X, X2))