From c14aef948ea4f1e2dd663a70dfa2cd30ae89bf6a Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 3 Dec 2014 17:18:22 -0800
Subject: [PATCH 01/13] fixed minor bug in sparse gp minibatch

---
 GPy/models/sparse_gp_minibatch.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index ec2e28f5..f5119e48 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -47,10 +47,11 @@ Created on 3 Nov 2014
     def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
                  name='sparse gp', Y_metadata=None, normalizer=False,
                  missing_data=False, stochastic=False, batchsize=1):
-        #pick a sensible inference method
+        
+        # pick a sensible inference method
         if inference_method is None:
             if isinstance(likelihood, likelihoods.Gaussian):
-                inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
+                inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
             else:
                 #inference_method = ??
                 raise NotImplementedError, "what to do what to do?"

From ecf463e88631bcc3ef3c4eb608fdedd0c81edbcc Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 4 Dec 2014 14:21:50 +0000
Subject: [PATCH 02/13] implement update_gradients_diag for MLP kernel

---
 GPy/kern/_src/mlp.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index badbd60d..16e84363 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -79,8 +79,14 @@ class MLP(Kern):
                              + 2*self.bias_variance + 2.))*base_cov_grad).sum()
 
     def update_gradients_diag(self, X):
-        raise NotImplementedError, "TODO"
-
+        self._K_diag_computations(X)
+        self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
+        
+        base = four_over_tau*self.variance/np.sqrt(1-self._K_diag_asin_arg*self._K_diag_asin_arg)
+        base_cov_grad = base*dL_dKdiag/np.square(self._K_diag_denom)
+        
+        self.weight_variance.gradient = (base_cov_grad*np.square(X).sum(axis=1)).sum()
+        self.bias_variance.gradient = base_cov_grad.sum()
 
     def gradients_X(self, dL_dK, X, X2):
         """Derivative of the covariance matrix with respect to X"""

From bd1fb56e6c58eaf348322df4283e2ad8bfafad04 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 2 Jan 2015 15:07:19 -0800
Subject: [PATCH 03/13] re-implemented warpedGP for new release of GPy

---
 GPy/kern/__init__.py          |  2 +-
 GPy/models/warped_gp.py       | 90 ++++++++++++++++-------------------
 GPy/util/warping_functions.py | 54 +++++++++++++--------
 3 files changed, 76 insertions(+), 70 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index c400277c..7a7c7ad8 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -1,7 +1,7 @@
 from _src.kern import Kern
 from _src.rbf import RBF
 from _src.linear import Linear, LinearFull
-from _src.static import Bias, White
+from _src.static import Bias, White, Fixed
 from _src.brownian import Brownian
 from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
 from _src.mlp import MLP
diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py
index 4b982ed2..5bc9a417 100644
--- a/GPy/models/warped_gp.py
+++ b/GPy/models/warped_gp.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
 from ..util.warping_functions import *
 from ..core import GP
@@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
 from GPy import kern
 
 class WarpedGP(GP):
-    def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
+    def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
 
         if kernel is None:
-            kernel = kern.rbf(X.shape[1])
+            kernel = kern.RBF(X.shape[1])
 
         if warping_function == None:
             self.warping_function = TanhWarpingFunction_d(warping_terms)
             self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
+        else:
+            self.warping_function = warping_function
 
         self.scale_data = False
         if self.scale_data:
@@ -25,10 +26,10 @@ class WarpedGP(GP):
         self.has_uncertain_inputs = False
         self.Y_untransformed = Y.copy()
         self.predict_in_warped_space = False
-        likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
+        likelihood = likelihoods.Gaussian()
 
-        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
+        self.link_parameter(self.warping_function)
 
     def _scale_data(self, Y):
         self._Ymax = Y.max()
@@ -38,62 +39,55 @@ class WarpedGP(GP):
     def _unscale_data(self, Y):
         return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
 
-    def _set_params(self, x):
-        self.warping_params = x[:self.warping_function.num_parameters]
-        Y = self.transform_data()
-        self.likelihood.set_data(Y)
-        GP._set_params(self, x[self.warping_function.num_parameters:].copy())
+    def parameters_changed(self):
+        self.Y[:] = self.transform_data()
+        super(WarpedGP, self).parameters_changed()
 
-    def _get_params(self):
-        return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
+        Kiy = self.posterior.woodbury_vector.flatten()
 
-    def _get_param_names(self):
-        warping_names = self.warping_function._get_param_names()
-        param_names = GP._get_param_names(self)
-        return warping_names + param_names
-
-    def transform_data(self):
-        Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
-        return Y
-
-    def log_likelihood(self):
-        ll = GP.log_likelihood(self)
-        jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
-        return ll + np.log(jacobian).sum()
-
-    def _log_likelihood_gradients(self):
-        ll_grads = GP._log_likelihood_gradients(self)
-        alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
-        warping_grads = self.warping_function_gradients(alpha)
-
-        warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
-        return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
-
-    def warping_function_gradients(self, Kiy):
-        grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
-        grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
+        grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
+        grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
                                                                  return_covar_chain=True)
         djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
         dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
 
-        return -dquad_dpsi + djac_dpsi
+        warping_grads = -dquad_dpsi + djac_dpsi
+
+        self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
+        self.warping_function.d.gradient[:] = warping_grads[0, -1]
+
+
+    def transform_data(self):
+        Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
+        return Y
+
+    def log_likelihood(self):
+        ll = GP.log_likelihood(self)
+        jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
+        return ll + np.log(jacobian).sum()
 
     def plot_warping(self):
-        self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
+        self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
 
-    def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
+    def predict(self, Xnew, which_parts='all', pred_init=None):
         # normalize X values
-        Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
-        mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
+        # Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
+        mu, var = GP._raw_predict(self, Xnew)
 
         # now push through likelihood
-        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+        mean, var = self.likelihood.predictive_values(mu, var)
 
         if self.predict_in_warped_space:
-            mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
-            var = self.warping_function.f_inv(var, self.warping_params)
+            mean = self.warping_function.f_inv(mean,  y=pred_init)
+            var = self.warping_function.f_inv(var)
 
         if self.scale_data:
             mean = self._unscale_data(mean)
-        
-        return mean, var, _025pm, _975pm
+
+        return mean, var
+
+if __name__ == '__main__':
+    X = np.random.randn(100, 1)
+    Y = np.sin(X) + np.random.randn(100, 1)*0.05
+
+    m = WarpedGP(X, Y)
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index a0a385e0..a7547be6 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -1,17 +1,18 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
+from GPy.core.parameterization import Parameterized, Param
+from ..core.parameterization.transformations import Logexp
 
-class WarpingFunction(object):
+class WarpingFunction(Parameterized):
     """
     abstract function for warping
     z = f(y)
     """
 
-    def __init__(self):
-        raise NotImplementedError
+    def __init__(self, name):
+        super(WarpingFunction, self).__init__(name=name)
 
     def f(self,y,psi):
         """function transformation
@@ -34,9 +35,10 @@ class WarpingFunction(object):
     def _get_param_names(self):
         raise NotImplementedError
 
-    def plot(self, psi, xmin, xmax):
+    def plot(self,  xmin, xmax):
+        psi = self.psi
         y = np.arange(xmin, xmax, 0.01)
-        f_y = self.f(y, psi)
+        f_y = self.f(y)
         from matplotlib import pyplot as plt
         plt.figure()
         plt.plot(y, f_y)
@@ -50,6 +52,7 @@ class TanhWarpingFunction(WarpingFunction):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms
+        super(TanhWarpingFunction, self).__init__(name='warp_tanh')
 
     def f(self,y,psi):
         """
@@ -163,8 +166,18 @@ class TanhWarpingFunction_d(WarpingFunction):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms + 1
+        self.psi = np.ones((self.n_terms, 3))
 
-    def f(self,y,psi):
+        super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
+        self.psi = Param('psi', self.psi)
+        self.psi[:, :2].constrain_positive()
+
+        self.d = Param('%s' % ('d'), 1.0, Logexp())
+        self.link_parameter(self.psi)
+        self.link_parameter(self.d)
+
+
+    def f(self,y):
         """
         Transform y with f using parameter vector psi
         psi = [[a,b,c]]
@@ -175,9 +188,9 @@ class TanhWarpingFunction_d(WarpingFunction):
         #1. check that number of params is consistent
         # assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
         # assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
-        mpsi = psi.copy()
-        d = psi[-1]
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
+
+        d = self.d
+        mpsi = self.psi
 
         #3. transform data
         z = d*y.copy()
@@ -187,7 +200,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return z
 
 
-    def f_inv(self, z, psi, max_iterations=1000, y=None):
+    def f_inv(self, z, max_iterations=1000, y=None):
         """
         calculate the numerical inverse of f
 
@@ -198,12 +211,12 @@ class TanhWarpingFunction_d(WarpingFunction):
         z = z.copy()
         if y is None:
             y = np.ones_like(z)
-            
+
         it = 0
         update = np.inf
 
         while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
-            update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
+            update = (self.f(y) - z)/self.fgrad_y(y)
             y -= update
             it += 1
         if it == max_iterations:
@@ -212,7 +225,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return y
 
 
-    def fgrad_y(self, y, psi, return_precalc = False):
+    def fgrad_y(self, y,return_precalc = False):
         """
         gradient of f w.r.t to y ([N x 1])
 
@@ -221,9 +234,8 @@ class TanhWarpingFunction_d(WarpingFunction):
         """
 
 
-        mpsi = psi.copy()
-        d = psi[-1]
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
+        d = self.d
+        mpsi = self.psi
 
         # vectorized version
 
@@ -240,7 +252,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return GRAD
 
 
-    def fgrad_y_psi(self, y, psi, return_covar_chain = False):
+    def fgrad_y_psi(self, y, return_covar_chain = False):
         """
         gradient of f w.r.t to y and psi
 
@@ -248,10 +260,10 @@ class TanhWarpingFunction_d(WarpingFunction):
 
         """
 
-        mpsi = psi.copy()
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
 
-        w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
+        mpsi = self.psi
+
+        w, s, r, d = self.fgrad_y(y, return_precalc = True)
 
         gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
         for i in range(len(mpsi)):

From 1d2cbfe44a9f78a1206fdd2366a36e7cad562bd1 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Fri, 6 Feb 2015 19:39:46 +1100
Subject: [PATCH 04/13] first attempt

---
 GPy/kern/_src/prod.py       | 15 +++++++++++++--
 GPy/testing/kernel_tests.py | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index dd9a5fe4..e3776838 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -5,6 +5,7 @@ import numpy as np
 from kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
+import operator
 
 class Prod(CombinationKernel):
     """
@@ -42,9 +43,19 @@ class Prod(CombinationKernel):
         return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
 
     def update_gradients_full(self, dL_dK, X, X2=None):
+        np.seterr(invalid='raise')
         k = self.K(X,X2)*dL_dK
-        for p in self.parts:
-            p.update_gradients_full(k/p.K(X,X2),X,X2)
+        try:
+            for p in self.parts:
+                p.update_gradients_full(k/p.K(X,X2),X,X2)
+        except FloatingPointError:
+            np.seterr(invalid='warn')
+            print "Gradient warning: falling back to slow version due to zero-valued kernel"
+            for combination in itertools.combinations(self.parts, len(self.parts) - 1):
+                prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
+                to_update = list(set(self.parts) - set(combination))[0]
+                to_update.update_gradients_full(dL_dK * prod, X, X2)
+
 
     def update_gradients_diag(self, dL_dKdiag, X):
         k = self.Kdiag(X)*dL_dKdiag
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index c1bb9265..387047b6 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -400,12 +400,27 @@ class Coregionalize_weave_test(unittest.TestCase):
     #reset the weave state for any other tests
     GPy.util.config.config.set('weave', 'working', 'False')
 
+class KernelTestsProductWithZeroValues(unittest.TestCase):
+
+    def test_zero_valued_kernel(self):
+        X = np.array([[0,1],[1,0]])
+        Y = np.array([[1],[10]])
+        lin = GPy.kern.Linear(2)
+        bias = GPy.kern.Bias(2)
+        k = lin * bias
+        #k = lin
+        m = GPy.models.GPRegression(X, Y, kernel=k)
+        #m['mul.bias.variance'].constrain_fixed(0)
+        m.optimize(messages=False)
 
 
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
     unittest.main()
+    #suite = unittest.TestLoader().loadTestsFromTestCase(KernelTestsProductWithZeroValues)
+    #unittest.TextTestRunner().run(suite)
+
 #     np.random.seed(0)
 #     N0 = 3
 #     N1 = 9

From 8b4274339ad034aeede9b926ac47bad89ae2f397 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:28:53 +1100
Subject: [PATCH 05/13] added decorator that changes numpy invalid op warning
 to exception

---
 GPy/kern/_src/prod.py       | 20 +++++++++++++++++---
 GPy/testing/kernel_tests.py |  7 ++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index e3776838..4f9f5ea6 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -7,6 +7,19 @@ from ...util.caching import Cache_this
 import itertools
 import operator
 
+
+def numpy_invalid_op_as_exception(func):
+    """
+    A decorator that allows catching numpy invalid operations
+    as exceptions (the default behaviour is raising warnings).
+    """
+    def func_wrapper(*args, **kwargs):
+        np.seterr(invalid='raise')
+        func(*args, **kwargs)
+        np.seterr(invalid='warn')
+    return func_wrapper
+
+
 class Prod(CombinationKernel):
     """
     Computes the product of 2 kernels
@@ -42,15 +55,14 @@ class Prod(CombinationKernel):
             which_parts = self.parts
         return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
 
+    @numpy_invalid_op_as_exception
     def update_gradients_full(self, dL_dK, X, X2=None):
-        np.seterr(invalid='raise')
         k = self.K(X,X2)*dL_dK
         try:
             for p in self.parts:
                 p.update_gradients_full(k/p.K(X,X2),X,X2)
         except FloatingPointError:
-            np.seterr(invalid='warn')
-            print "Gradient warning: falling back to slow version due to zero-valued kernel"
+            #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
                 prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
@@ -75,3 +87,5 @@ class Prod(CombinationKernel):
         for p in self.parts:
             target += p.gradients_X_diag(k/p.Kdiag(X),X)
         return target
+
+
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 387047b6..ac6d7ab4 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -408,10 +408,11 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
         lin = GPy.kern.Linear(2)
         bias = GPy.kern.Bias(2)
         k = lin * bias
-        #k = lin
         m = GPy.models.GPRegression(X, Y, kernel=k)
-        #m['mul.bias.variance'].constrain_fixed(0)
-        m.optimize(messages=False)
+        try:
+            m.optimize()
+        except np.linalg.LinAlgError:
+            self.fail("Zero-valued kernel raised exception!")
 
 
 

From d6a56a6f0bf234f4c8c9f9f1b595ff5c9305bed0 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:35:32 +1100
Subject: [PATCH 06/13] changed operator.mul to np.multiply for consistency

---
 GPy/kern/_src/prod.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 4f9f5ea6..241c2448 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -5,7 +5,6 @@ import numpy as np
 from kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
-import operator
 
 
 def numpy_invalid_op_as_exception(func):
@@ -64,7 +63,7 @@ class Prod(CombinationKernel):
         except FloatingPointError:
             #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
-                prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
+                prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
                 to_update.update_gradients_full(dL_dK * prod, X, X2)
 

From fc8705104b05cadc772307536f06f6de803c72bc Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:41:21 +1100
Subject: [PATCH 07/13] a cleaner test

---
 GPy/kern/_src/prod.py       | 1 -
 GPy/testing/kernel_tests.py | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 241c2448..5e4c0d29 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -61,7 +61,6 @@ class Prod(CombinationKernel):
             for p in self.parts:
                 p.update_gradients_full(k/p.K(X,X2),X,X2)
         except FloatingPointError:
-            #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
                 prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index ac6d7ab4..f9d90607 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -408,11 +408,9 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
         lin = GPy.kern.Linear(2)
         bias = GPy.kern.Bias(2)
         k = lin * bias
-        m = GPy.models.GPRegression(X, Y, kernel=k)
-        try:
-            m.optimize()
-        except np.linalg.LinAlgError:
-            self.fail("Zero-valued kernel raised exception!")
+        k.update_gradients_full(1, X)
+        self.assertFalse(np.isnan(k['linear.variances'].gradient),
+                         "Gradient resulted in NaN")
 
 
 

From 98c743d157f2954226b0ef5b6d3d1817f28e67f6 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 10:02:26 +1100
Subject: [PATCH 08/13] test + code change in gradients_X

---
 GPy/kern/_src/prod.py       | 15 +++++++++++----
 GPy/testing/kernel_tests.py | 22 ++++++++++++----------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 5e4c0d29..a3b49973 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -14,8 +14,9 @@ def numpy_invalid_op_as_exception(func):
     """
     def func_wrapper(*args, **kwargs):
         np.seterr(invalid='raise')
-        func(*args, **kwargs)
+        result = func(*args, **kwargs)
         np.seterr(invalid='warn')
+        return result
     return func_wrapper
 
 
@@ -66,17 +67,23 @@ class Prod(CombinationKernel):
                 to_update = list(set(self.parts) - set(combination))[0]
                 to_update.update_gradients_full(dL_dK * prod, X, X2)
 
-
     def update_gradients_diag(self, dL_dKdiag, X):
         k = self.Kdiag(X)*dL_dKdiag
         for p in self.parts:
             p.update_gradients_diag(k/p.Kdiag(X),X)
 
+    @numpy_invalid_op_as_exception            
     def gradients_X(self, dL_dK, X, X2=None):
         target = np.zeros(X.shape)
         k = self.K(X,X2)*dL_dK
-        for p in self.parts:
-            target += p.gradients_X(k/p.K(X,X2),X,X2)
+        try:
+            for p in self.parts:
+                target += p.gradients_X(k/p.K(X,X2),X,X2)
+        except FloatingPointError:
+            for combination in itertools.combinations(self.parts, len(self.parts) - 1):
+                prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
+                to_update = list(set(self.parts) - set(combination))[0]
+                target += to_update.gradients_X(dL_dK * prod, X, X2)
         return target
 
     def gradients_X_diag(self, dL_dKdiag, X):
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index f9d90607..415cc7eb 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -400,25 +400,27 @@ class Coregionalize_weave_test(unittest.TestCase):
     #reset the weave state for any other tests
     GPy.util.config.config.set('weave', 'working', 'False')
 
+
 class KernelTestsProductWithZeroValues(unittest.TestCase):
 
-    def test_zero_valued_kernel(self):
-        X = np.array([[0,1],[1,0]])
-        Y = np.array([[1],[10]])
-        lin = GPy.kern.Linear(2)
-        bias = GPy.kern.Bias(2)
-        k = lin * bias
-        k.update_gradients_full(1, X)
-        self.assertFalse(np.isnan(k['linear.variances'].gradient),
+    def setUp(self):
+        self.X = np.array([[0,1],[1,0]])
+        self.k = GPy.kern.Linear(2) * GPy.kern.Bias(2)
+
+    def test_zero_valued_kernel_full(self):
+        self.k.update_gradients_full(1, self.X)
+        self.assertFalse(np.isnan(self.k['linear.variances'].gradient),
                          "Gradient resulted in NaN")
 
+    def test_zero_valued_kernel_gradients_X(self):
+        target = self.k.gradients_X(1, self.X)
+        self.assertFalse(np.any(np.isnan(target)),
+                         "Gradient resulted in NaN")
 
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
     unittest.main()
-    #suite = unittest.TestLoader().loadTestsFromTestCase(KernelTestsProductWithZeroValues)
-    #unittest.TextTestRunner().run(suite)
 
 #     np.random.seed(0)
 #     N0 = 3

From 952851de88c2a0054502c2fa0b98109ee867ecde Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 9 Feb 2015 19:35:46 +0000
Subject: [PATCH 09/13] Bug in linalg jitchol!!!

---
 GPy/testing/linalg_test.py | 35 +++++++++++++++++++++++++++++++++++
 GPy/util/linalg.py         | 12 ++++++------
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 GPy/testing/linalg_test.py

diff --git a/GPy/testing/linalg_test.py b/GPy/testing/linalg_test.py
new file mode 100644
index 00000000..b734f6af
--- /dev/null
+++ b/GPy/testing/linalg_test.py
@@ -0,0 +1,35 @@
+import numpy as np
+import scipy as sp
+from ..util.linalg import jitchol
+
+class LinalgTests(np.testing.TestCase):
+    def setUp(self):
+        #Create PD matrix
+        A = np.random.randn(20,100)
+        self.A = A.dot(A.T)
+        #compute Eigdecomp
+        vals, vectors = np.linalg.eig(self.A)
+        #Set smallest eigenval to be negative with 5 rounds worth of jitter
+        vals[vals.argmin()] = 0
+        default_jitter = 1e-6*np.mean(vals)
+        vals[vals.argmin()] = -default_jitter*(10**3.5)
+        self.A_corrupt = (vectors * vals).dot(vectors.T)
+
+    def test_jitchol_success(self):
+        """
+        Expect 5 rounds of jitter to be added and for the recovered matrix to be
+        identical to the corrupted matrix apart from the jitter added to the diagonal
+        """
+        L = jitchol(self.A_corrupt, maxtries=5)
+        A_new = L.dot(L.T)
+        diff = A_new - self.A_corrupt
+        np.testing.assert_allclose(diff, np.eye(A_new.shape[0])*np.diag(diff).mean(), atol=1e-13)
+
+    def test_jitchol_failure(self):
+        try:
+            """ Expecting an exception to be thrown as we expect it to require
+            5 rounds of jitter to be added to enforce PDness"""
+            jitchol(self.A_corrupt, maxtries=4)
+            return False
+        except sp.linalg.LinAlgError:
+            return True
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index dffd438a..2c02357c 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -82,6 +82,7 @@ def force_F_ordered(A):
 
 #         return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
 
+
 def jitchol(A, maxtries=5):
     A = np.ascontiguousarray(A)
     L, info = lapack.dpotrf(A, lower=1)
@@ -92,13 +93,16 @@ def jitchol(A, maxtries=5):
         if np.any(diagA <= 0.):
             raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
         jitter = diagA.mean() * 1e-6
-        while maxtries > 0 and np.isfinite(jitter):
+        num_tries = 0
+        while num_tries < maxtries and np.isfinite(jitter):
             try:
+                print jitter
                 L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
+                return L
             except:
                 jitter *= 10
             finally:
-                maxtries -= 1
+                num_tries += 1
         raise linalg.LinAlgError, "not positive definite, even with jitter."
     import traceback
     try: raise
@@ -108,10 +112,6 @@ def jitchol(A, maxtries=5):
     import ipdb;ipdb.set_trace()
     return L
 
-
-
-
-
 # def dtrtri(L, lower=1):
 #     """
 #     Wrapper for lapack dtrtri function

From ae5d70b063536cf41452892b8e8adc13b01cdab4 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Sun, 15 Feb 2015 19:24:51 +0000
Subject: [PATCH 10/13] add mcmc into inference import

---
 GPy/inference/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/__init__.py b/GPy/inference/__init__.py
index f1ffd595..7b1307e3 100644
--- a/GPy/inference/__init__.py
+++ b/GPy/inference/__init__.py
@@ -1,2 +1,3 @@
 import latent_function_inference
-import optimization
\ No newline at end of file
+import optimization
+import mcmc

From c5c8b8341c1908b62a93e144143a91ad2cb10f08 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Tue, 17 Feb 2015 10:48:26 +0000
Subject: [PATCH 11/13] A temporal fix for the problem of sometimes the model
 not being updated.

---
 GPy/core/gp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 25066381..3252ac08 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -124,6 +124,7 @@ class GP(Model):
             else:
                 self.X = ObsAr(X)
         self.update_model(True)
+        self._trigger_params_changed()
 
     def set_X(self,X):
         """

From 7ad275ce8a81a0b6b61f3ae0c17090ce58f6b731 Mon Sep 17 00:00:00 2001
From: mellorjc <mojoeschmoe@gmail.com>
Date: Thu, 19 Feb 2015 11:31:46 +0000
Subject: [PATCH 12/13] matplotlib interactive mode only in IPython

have interactive mode only in IPython so that running scripts that plot from python behave like normal.
---
 GPy/plotting/matplot_dep/maps.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index fcb03b38..eef72a6a 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -6,7 +6,11 @@ try:
     from matplotlib.patches import Polygon
     from matplotlib.collections import PatchCollection
     #from matplotlib import cm
-    pb.ion()
+    try:
+        __IPYTHON__
+        pb.ion()
+    except:
+        pass
 except:
     pass
 import re

From f25797cd617655f73b828f803e38ccc3e7144e60 Mon Sep 17 00:00:00 2001
From: mellorjc <mojoeschmoe@gmail.com>
Date: Thu, 19 Feb 2015 11:45:57 +0000
Subject: [PATCH 13/13] catch only a specific error

catch only NameError, rather than everything.
---
 GPy/plotting/matplot_dep/maps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index eef72a6a..a651f34d 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -9,7 +9,7 @@ try:
     try:
         __IPYTHON__
         pb.ion()
-    except:
+    except NameError:
         pass
 except:
     pass