From c14aef948ea4f1e2dd663a70dfa2cd30ae89bf6a Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 3 Dec 2014 17:18:22 -0800
Subject: [PATCH 001/166] fixed minor bug in sparse gp minibatch

---
 GPy/models/sparse_gp_minibatch.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index ec2e28f5..f5119e48 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -47,10 +47,11 @@ Created on 3 Nov 2014
     def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
                  name='sparse gp', Y_metadata=None, normalizer=False,
                  missing_data=False, stochastic=False, batchsize=1):
-        #pick a sensible inference method
+        
+        # pick a sensible inference method
         if inference_method is None:
             if isinstance(likelihood, likelihoods.Gaussian):
-                inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
+                inference_method = var_dtc.VarDTC(limit=1 if not missing_data else Y.shape[1])
             else:
                 #inference_method = ??
                 raise NotImplementedError, "what to do what to do?"

From ecf463e88631bcc3ef3c4eb608fdedd0c81edbcc Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 4 Dec 2014 14:21:50 +0000
Subject: [PATCH 002/166] implement update_gradients_diag for MLP kernel

---
 GPy/kern/_src/mlp.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index badbd60d..16e84363 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -79,8 +79,14 @@ class MLP(Kern):
                              + 2*self.bias_variance + 2.))*base_cov_grad).sum()
 
     def update_gradients_diag(self, X):
-        raise NotImplementedError, "TODO"
-
+        self._K_diag_computations(X)
+        self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
+        
+        base = four_over_tau*self.variance/np.sqrt(1-self._K_diag_asin_arg*self._K_diag_asin_arg)
+        base_cov_grad = base*dL_dKdiag/np.square(self._K_diag_denom)
+        
+        self.weight_variance.gradient = (base_cov_grad*np.square(X).sum(axis=1)).sum()
+        self.bias_variance.gradient = base_cov_grad.sum()
 
     def gradients_X(self, dL_dK, X, X2):
         """Derivative of the covariance matrix with respect to X"""

From bd1fb56e6c58eaf348322df4283e2ad8bfafad04 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Fri, 2 Jan 2015 15:07:19 -0800
Subject: [PATCH 003/166] re-implemented warpedGP for new release of GPy

---
 GPy/kern/__init__.py          |  2 +-
 GPy/models/warped_gp.py       | 90 ++++++++++++++++-------------------
 GPy/util/warping_functions.py | 54 +++++++++++++--------
 3 files changed, 76 insertions(+), 70 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index c400277c..7a7c7ad8 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -1,7 +1,7 @@
 from _src.kern import Kern
 from _src.rbf import RBF
 from _src.linear import Linear, LinearFull
-from _src.static import Bias, White
+from _src.static import Bias, White, Fixed
 from _src.brownian import Brownian
 from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
 from _src.mlp import MLP
diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py
index 4b982ed2..5bc9a417 100644
--- a/GPy/models/warped_gp.py
+++ b/GPy/models/warped_gp.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
 from ..util.warping_functions import *
 from ..core import GP
@@ -10,14 +9,16 @@ from GPy.util.warping_functions import TanhWarpingFunction_d
 from GPy import kern
 
 class WarpedGP(GP):
-    def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False):
+    def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
 
         if kernel is None:
-            kernel = kern.rbf(X.shape[1])
+            kernel = kern.RBF(X.shape[1])
 
         if warping_function == None:
             self.warping_function = TanhWarpingFunction_d(warping_terms)
             self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1)
+        else:
+            self.warping_function = warping_function
 
         self.scale_data = False
         if self.scale_data:
@@ -25,10 +26,10 @@ class WarpedGP(GP):
         self.has_uncertain_inputs = False
         self.Y_untransformed = Y.copy()
         self.predict_in_warped_space = False
-        likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
+        likelihood = likelihoods.Gaussian()
 
-        GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
+        self.link_parameter(self.warping_function)
 
     def _scale_data(self, Y):
         self._Ymax = Y.max()
@@ -38,62 +39,55 @@ class WarpedGP(GP):
     def _unscale_data(self, Y):
         return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
 
-    def _set_params(self, x):
-        self.warping_params = x[:self.warping_function.num_parameters]
-        Y = self.transform_data()
-        self.likelihood.set_data(Y)
-        GP._set_params(self, x[self.warping_function.num_parameters:].copy())
+    def parameters_changed(self):
+        self.Y[:] = self.transform_data()
+        super(WarpedGP, self).parameters_changed()
 
-    def _get_params(self):
-        return np.hstack((self.warping_params.flatten().copy(), GP._get_params(self).copy()))
+        Kiy = self.posterior.woodbury_vector.flatten()
 
-    def _get_param_names(self):
-        warping_names = self.warping_function._get_param_names()
-        param_names = GP._get_param_names(self)
-        return warping_names + param_names
-
-    def transform_data(self):
-        Y = self.warping_function.f(self.Y_untransformed.copy(), self.warping_params).copy()
-        return Y
-
-    def log_likelihood(self):
-        ll = GP.log_likelihood(self)
-        jacobian = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
-        return ll + np.log(jacobian).sum()
-
-    def _log_likelihood_gradients(self):
-        ll_grads = GP._log_likelihood_gradients(self)
-        alpha = np.dot(self.Ki, self.likelihood.Y.flatten())
-        warping_grads = self.warping_function_gradients(alpha)
-
-        warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1])
-        return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
-
-    def warping_function_gradients(self, Kiy):
-        grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params)
-        grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params,
+        grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
+        grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
                                                                  return_covar_chain=True)
         djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
         dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
 
-        return -dquad_dpsi + djac_dpsi
+        warping_grads = -dquad_dpsi + djac_dpsi
+
+        self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
+        self.warping_function.d.gradient[:] = warping_grads[0, -1]
+
+
+    def transform_data(self):
+        Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
+        return Y
+
+    def log_likelihood(self):
+        ll = GP.log_likelihood(self)
+        jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
+        return ll + np.log(jacobian).sum()
 
     def plot_warping(self):
-        self.warping_function.plot(self.warping_params, self.Y_untransformed.min(), self.Y_untransformed.max())
+        self.warping_function.plot(self.Y_untransformed.min(), self.Y_untransformed.max())
 
-    def predict(self, Xnew, which_parts='all', full_cov=False, pred_init=None):
+    def predict(self, Xnew, which_parts='all', pred_init=None):
         # normalize X values
-        Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
-        mu, var = GP._raw_predict(self, Xnew, full_cov=full_cov, which_parts=which_parts)
+        # Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
+        mu, var = GP._raw_predict(self, Xnew)
 
         # now push through likelihood
-        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+        mean, var = self.likelihood.predictive_values(mu, var)
 
         if self.predict_in_warped_space:
-            mean = self.warping_function.f_inv(mean, self.warping_params, y=pred_init)
-            var = self.warping_function.f_inv(var, self.warping_params)
+            mean = self.warping_function.f_inv(mean,  y=pred_init)
+            var = self.warping_function.f_inv(var)
 
         if self.scale_data:
             mean = self._unscale_data(mean)
-        
-        return mean, var, _025pm, _975pm
+
+        return mean, var
+
+if __name__ == '__main__':
+    X = np.random.randn(100, 1)
+    Y = np.sin(X) + np.random.randn(100, 1)*0.05
+
+    m = WarpedGP(X, Y)
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index a0a385e0..a7547be6 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -1,17 +1,18 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
+from GPy.core.parameterization import Parameterized, Param
+from ..core.parameterization.transformations import Logexp
 
-class WarpingFunction(object):
+class WarpingFunction(Parameterized):
     """
     abstract function for warping
     z = f(y)
     """
 
-    def __init__(self):
-        raise NotImplementedError
+    def __init__(self, name):
+        super(WarpingFunction, self).__init__(name=name)
 
     def f(self,y,psi):
         """function transformation
@@ -34,9 +35,10 @@ class WarpingFunction(object):
     def _get_param_names(self):
         raise NotImplementedError
 
-    def plot(self, psi, xmin, xmax):
+    def plot(self,  xmin, xmax):
+        psi = self.psi
         y = np.arange(xmin, xmax, 0.01)
-        f_y = self.f(y, psi)
+        f_y = self.f(y)
         from matplotlib import pyplot as plt
         plt.figure()
         plt.plot(y, f_y)
@@ -50,6 +52,7 @@ class TanhWarpingFunction(WarpingFunction):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms
+        super(TanhWarpingFunction, self).__init__(name='warp_tanh')
 
     def f(self,y,psi):
         """
@@ -163,8 +166,18 @@ class TanhWarpingFunction_d(WarpingFunction):
         """n_terms specifies the number of tanh terms to be used"""
         self.n_terms = n_terms
         self.num_parameters = 3 * self.n_terms + 1
+        self.psi = np.ones((self.n_terms, 3))
 
-    def f(self,y,psi):
+        super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
+        self.psi = Param('psi', self.psi)
+        self.psi[:, :2].constrain_positive()
+
+        self.d = Param('%s' % ('d'), 1.0, Logexp())
+        self.link_parameter(self.psi)
+        self.link_parameter(self.d)
+
+
+    def f(self,y):
         """
         Transform y with f using parameter vector psi
         psi = [[a,b,c]]
@@ -175,9 +188,9 @@ class TanhWarpingFunction_d(WarpingFunction):
         #1. check that number of params is consistent
         # assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
         # assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
-        mpsi = psi.copy()
-        d = psi[-1]
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
+
+        d = self.d
+        mpsi = self.psi
 
         #3. transform data
         z = d*y.copy()
@@ -187,7 +200,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return z
 
 
-    def f_inv(self, z, psi, max_iterations=1000, y=None):
+    def f_inv(self, z, max_iterations=1000, y=None):
         """
         calculate the numerical inverse of f
 
@@ -198,12 +211,12 @@ class TanhWarpingFunction_d(WarpingFunction):
         z = z.copy()
         if y is None:
             y = np.ones_like(z)
-            
+
         it = 0
         update = np.inf
 
         while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
-            update = (self.f(y, psi) - z)/self.fgrad_y(y, psi)
+            update = (self.f(y) - z)/self.fgrad_y(y)
             y -= update
             it += 1
         if it == max_iterations:
@@ -212,7 +225,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return y
 
 
-    def fgrad_y(self, y, psi, return_precalc = False):
+    def fgrad_y(self, y,return_precalc = False):
         """
         gradient of f w.r.t to y ([N x 1])
 
@@ -221,9 +234,8 @@ class TanhWarpingFunction_d(WarpingFunction):
         """
 
 
-        mpsi = psi.copy()
-        d = psi[-1]
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
+        d = self.d
+        mpsi = self.psi
 
         # vectorized version
 
@@ -240,7 +252,7 @@ class TanhWarpingFunction_d(WarpingFunction):
         return GRAD
 
 
-    def fgrad_y_psi(self, y, psi, return_covar_chain = False):
+    def fgrad_y_psi(self, y, return_covar_chain = False):
         """
         gradient of f w.r.t to y and psi
 
@@ -248,10 +260,10 @@ class TanhWarpingFunction_d(WarpingFunction):
 
         """
 
-        mpsi = psi.copy()
-        mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
 
-        w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
+        mpsi = self.psi
+
+        w, s, r, d = self.fgrad_y(y, return_precalc = True)
 
         gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
         for i in range(len(mpsi)):

From 1d2cbfe44a9f78a1206fdd2366a36e7cad562bd1 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Fri, 6 Feb 2015 19:39:46 +1100
Subject: [PATCH 004/166] first attempt

---
 GPy/kern/_src/prod.py       | 15 +++++++++++++--
 GPy/testing/kernel_tests.py | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index dd9a5fe4..e3776838 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -5,6 +5,7 @@ import numpy as np
 from kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
+import operator
 
 class Prod(CombinationKernel):
     """
@@ -42,9 +43,19 @@ class Prod(CombinationKernel):
         return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
 
     def update_gradients_full(self, dL_dK, X, X2=None):
+        np.seterr(invalid='raise')
         k = self.K(X,X2)*dL_dK
-        for p in self.parts:
-            p.update_gradients_full(k/p.K(X,X2),X,X2)
+        try:
+            for p in self.parts:
+                p.update_gradients_full(k/p.K(X,X2),X,X2)
+        except FloatingPointError:
+            np.seterr(invalid='warn')
+            print "Gradient warning: falling back to slow version due to zero-valued kernel"
+            for combination in itertools.combinations(self.parts, len(self.parts) - 1):
+                prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
+                to_update = list(set(self.parts) - set(combination))[0]
+                to_update.update_gradients_full(dL_dK * prod, X, X2)
+
 
     def update_gradients_diag(self, dL_dKdiag, X):
         k = self.Kdiag(X)*dL_dKdiag
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index c1bb9265..387047b6 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -400,12 +400,27 @@ class Coregionalize_weave_test(unittest.TestCase):
     #reset the weave state for any other tests
     GPy.util.config.config.set('weave', 'working', 'False')
 
+class KernelTestsProductWithZeroValues(unittest.TestCase):
+
+    def test_zero_valued_kernel(self):
+        X = np.array([[0,1],[1,0]])
+        Y = np.array([[1],[10]])
+        lin = GPy.kern.Linear(2)
+        bias = GPy.kern.Bias(2)
+        k = lin * bias
+        #k = lin
+        m = GPy.models.GPRegression(X, Y, kernel=k)
+        #m['mul.bias.variance'].constrain_fixed(0)
+        m.optimize(messages=False)
 
 
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
     unittest.main()
+    #suite = unittest.TestLoader().loadTestsFromTestCase(KernelTestsProductWithZeroValues)
+    #unittest.TextTestRunner().run(suite)
+
 #     np.random.seed(0)
 #     N0 = 3
 #     N1 = 9

From 8b4274339ad034aeede9b926ac47bad89ae2f397 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:28:53 +1100
Subject: [PATCH 005/166] added decorator that changes numpy invalid op warning
 to exception

---
 GPy/kern/_src/prod.py       | 20 +++++++++++++++++---
 GPy/testing/kernel_tests.py |  7 ++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index e3776838..4f9f5ea6 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -7,6 +7,19 @@ from ...util.caching import Cache_this
 import itertools
 import operator
 
+
+def numpy_invalid_op_as_exception(func):
+    """
+    A decorator that allows catching numpy invalid operations
+    as exceptions (the default behaviour is raising warnings).
+    """
+    def func_wrapper(*args, **kwargs):
+        np.seterr(invalid='raise')
+        func(*args, **kwargs)
+        np.seterr(invalid='warn')
+    return func_wrapper
+
+
 class Prod(CombinationKernel):
     """
     Computes the product of 2 kernels
@@ -42,15 +55,14 @@ class Prod(CombinationKernel):
             which_parts = self.parts
         return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
 
+    @numpy_invalid_op_as_exception
     def update_gradients_full(self, dL_dK, X, X2=None):
-        np.seterr(invalid='raise')
         k = self.K(X,X2)*dL_dK
         try:
             for p in self.parts:
                 p.update_gradients_full(k/p.K(X,X2),X,X2)
         except FloatingPointError:
-            np.seterr(invalid='warn')
-            print "Gradient warning: falling back to slow version due to zero-valued kernel"
+            #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
                 prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
@@ -75,3 +87,5 @@ class Prod(CombinationKernel):
         for p in self.parts:
             target += p.gradients_X_diag(k/p.Kdiag(X),X)
         return target
+
+
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 387047b6..ac6d7ab4 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -408,10 +408,11 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
         lin = GPy.kern.Linear(2)
         bias = GPy.kern.Bias(2)
         k = lin * bias
-        #k = lin
         m = GPy.models.GPRegression(X, Y, kernel=k)
-        #m['mul.bias.variance'].constrain_fixed(0)
-        m.optimize(messages=False)
+        try:
+            m.optimize()
+        except np.linalg.LinAlgError:
+            self.fail("Zero-valued kernel raised exception!")
 
 
 

From d6a56a6f0bf234f4c8c9f9f1b595ff5c9305bed0 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:35:32 +1100
Subject: [PATCH 006/166] changed operator.mul to np.multiply for consistency

---
 GPy/kern/_src/prod.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 4f9f5ea6..241c2448 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -5,7 +5,6 @@ import numpy as np
 from kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
-import operator
 
 
 def numpy_invalid_op_as_exception(func):
@@ -64,7 +63,7 @@ class Prod(CombinationKernel):
         except FloatingPointError:
             #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
-                prod = reduce(operator.mul, [p.K(X, X2) for p in combination])
+                prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
                 to_update.update_gradients_full(dL_dK * prod, X, X2)
 

From fc8705104b05cadc772307536f06f6de803c72bc Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 09:41:21 +1100
Subject: [PATCH 007/166] a cleaner test

---
 GPy/kern/_src/prod.py       | 1 -
 GPy/testing/kernel_tests.py | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 241c2448..5e4c0d29 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -61,7 +61,6 @@ class Prod(CombinationKernel):
             for p in self.parts:
                 p.update_gradients_full(k/p.K(X,X2),X,X2)
         except FloatingPointError:
-            #print "WARNING: gradient calculation falling back to slow version due to zero-valued kernel"
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
                 prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index ac6d7ab4..f9d90607 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -408,11 +408,9 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
         lin = GPy.kern.Linear(2)
         bias = GPy.kern.Bias(2)
         k = lin * bias
-        m = GPy.models.GPRegression(X, Y, kernel=k)
-        try:
-            m.optimize()
-        except np.linalg.LinAlgError:
-            self.fail("Zero-valued kernel raised exception!")
+        k.update_gradients_full(1, X)
+        self.assertFalse(np.isnan(k['linear.variances'].gradient),
+                         "Gradient resulted in NaN")
 
 
 

From 98c743d157f2954226b0ef5b6d3d1817f28e67f6 Mon Sep 17 00:00:00 2001
From: Daniel Beck <Daniel Beck>
Date: Mon, 9 Feb 2015 10:02:26 +1100
Subject: [PATCH 008/166] test + code change in gradients_X

---
 GPy/kern/_src/prod.py       | 15 +++++++++++----
 GPy/testing/kernel_tests.py | 22 ++++++++++++----------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 5e4c0d29..a3b49973 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -14,8 +14,9 @@ def numpy_invalid_op_as_exception(func):
     """
     def func_wrapper(*args, **kwargs):
         np.seterr(invalid='raise')
-        func(*args, **kwargs)
+        result = func(*args, **kwargs)
         np.seterr(invalid='warn')
+        return result
     return func_wrapper
 
 
@@ -66,17 +67,23 @@ class Prod(CombinationKernel):
                 to_update = list(set(self.parts) - set(combination))[0]
                 to_update.update_gradients_full(dL_dK * prod, X, X2)
 
-
     def update_gradients_diag(self, dL_dKdiag, X):
         k = self.Kdiag(X)*dL_dKdiag
         for p in self.parts:
             p.update_gradients_diag(k/p.Kdiag(X),X)
 
+    @numpy_invalid_op_as_exception            
     def gradients_X(self, dL_dK, X, X2=None):
         target = np.zeros(X.shape)
         k = self.K(X,X2)*dL_dK
-        for p in self.parts:
-            target += p.gradients_X(k/p.K(X,X2),X,X2)
+        try:
+            for p in self.parts:
+                target += p.gradients_X(k/p.K(X,X2),X,X2)
+        except FloatingPointError:
+            for combination in itertools.combinations(self.parts, len(self.parts) - 1):
+                prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
+                to_update = list(set(self.parts) - set(combination))[0]
+                target += to_update.gradients_X(dL_dK * prod, X, X2)
         return target
 
     def gradients_X_diag(self, dL_dKdiag, X):
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index f9d90607..415cc7eb 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -400,25 +400,27 @@ class Coregionalize_weave_test(unittest.TestCase):
     #reset the weave state for any other tests
     GPy.util.config.config.set('weave', 'working', 'False')
 
+
 class KernelTestsProductWithZeroValues(unittest.TestCase):
 
-    def test_zero_valued_kernel(self):
-        X = np.array([[0,1],[1,0]])
-        Y = np.array([[1],[10]])
-        lin = GPy.kern.Linear(2)
-        bias = GPy.kern.Bias(2)
-        k = lin * bias
-        k.update_gradients_full(1, X)
-        self.assertFalse(np.isnan(k['linear.variances'].gradient),
+    def setUp(self):
+        self.X = np.array([[0,1],[1,0]])
+        self.k = GPy.kern.Linear(2) * GPy.kern.Bias(2)
+
+    def test_zero_valued_kernel_full(self):
+        self.k.update_gradients_full(1, self.X)
+        self.assertFalse(np.isnan(self.k['linear.variances'].gradient),
                          "Gradient resulted in NaN")
 
+    def test_zero_valued_kernel_gradients_X(self):
+        target = self.k.gradients_X(1, self.X)
+        self.assertFalse(np.any(np.isnan(target)),
+                         "Gradient resulted in NaN")
 
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
     unittest.main()
-    #suite = unittest.TestLoader().loadTestsFromTestCase(KernelTestsProductWithZeroValues)
-    #unittest.TextTestRunner().run(suite)
 
 #     np.random.seed(0)
 #     N0 = 3

From 952851de88c2a0054502c2fa0b98109ee867ecde Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 9 Feb 2015 19:35:46 +0000
Subject: [PATCH 009/166] Bug in linalg jitchol!!!

---
 GPy/testing/linalg_test.py | 35 +++++++++++++++++++++++++++++++++++
 GPy/util/linalg.py         | 12 ++++++------
 2 files changed, 41 insertions(+), 6 deletions(-)
 create mode 100644 GPy/testing/linalg_test.py

diff --git a/GPy/testing/linalg_test.py b/GPy/testing/linalg_test.py
new file mode 100644
index 00000000..b734f6af
--- /dev/null
+++ b/GPy/testing/linalg_test.py
@@ -0,0 +1,35 @@
+import numpy as np
+import scipy as sp
+from ..util.linalg import jitchol
+
+class LinalgTests(np.testing.TestCase):
+    def setUp(self):
+        #Create PD matrix
+        A = np.random.randn(20,100)
+        self.A = A.dot(A.T)
+        #compute Eigdecomp
+        vals, vectors = np.linalg.eig(self.A)
+        #Set smallest eigenval to be negative with 5 rounds worth of jitter
+        vals[vals.argmin()] = 0
+        default_jitter = 1e-6*np.mean(vals)
+        vals[vals.argmin()] = -default_jitter*(10**3.5)
+        self.A_corrupt = (vectors * vals).dot(vectors.T)
+
+    def test_jitchol_success(self):
+        """
+        Expect 5 rounds of jitter to be added and for the recovered matrix to be
+        identical to the corrupted matrix apart from the jitter added to the diagonal
+        """
+        L = jitchol(self.A_corrupt, maxtries=5)
+        A_new = L.dot(L.T)
+        diff = A_new - self.A_corrupt
+        np.testing.assert_allclose(diff, np.eye(A_new.shape[0])*np.diag(diff).mean(), atol=1e-13)
+
+    def test_jitchol_failure(self):
+        try:
+            """ Expecting an exception to be thrown as we expect it to require
+            5 rounds of jitter to be added to enforce PDness"""
+            jitchol(self.A_corrupt, maxtries=4)
+            return False
+        except sp.linalg.LinAlgError:
+            return True
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index dffd438a..2c02357c 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -82,6 +82,7 @@ def force_F_ordered(A):
 
 #         return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
 
+
 def jitchol(A, maxtries=5):
     A = np.ascontiguousarray(A)
     L, info = lapack.dpotrf(A, lower=1)
@@ -92,13 +93,16 @@ def jitchol(A, maxtries=5):
         if np.any(diagA <= 0.):
             raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
         jitter = diagA.mean() * 1e-6
-        while maxtries > 0 and np.isfinite(jitter):
+        num_tries = 0
+        while num_tries < maxtries and np.isfinite(jitter):
             try:
+                print jitter
                 L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
+                return L
             except:
                 jitter *= 10
             finally:
-                maxtries -= 1
+                num_tries += 1
         raise linalg.LinAlgError, "not positive definite, even with jitter."
     import traceback
     try: raise
@@ -108,10 +112,6 @@ def jitchol(A, maxtries=5):
     import ipdb;ipdb.set_trace()
     return L
 
-
-
-
-
 # def dtrtri(L, lower=1):
 #     """
 #     Wrapper for lapack dtrtri function

From ae5d70b063536cf41452892b8e8adc13b01cdab4 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Sun, 15 Feb 2015 19:24:51 +0000
Subject: [PATCH 010/166] add mcmc into inference import

---
 GPy/inference/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/__init__.py b/GPy/inference/__init__.py
index f1ffd595..7b1307e3 100644
--- a/GPy/inference/__init__.py
+++ b/GPy/inference/__init__.py
@@ -1,2 +1,3 @@
 import latent_function_inference
-import optimization
\ No newline at end of file
+import optimization
+import mcmc

From c5c8b8341c1908b62a93e144143a91ad2cb10f08 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Tue, 17 Feb 2015 10:48:26 +0000
Subject: [PATCH 011/166] A temporal fix for the problem of sometimes the model
 not being updated.

---
 GPy/core/gp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 25066381..3252ac08 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -124,6 +124,7 @@ class GP(Model):
             else:
                 self.X = ObsAr(X)
         self.update_model(True)
+        self._trigger_params_changed()
 
     def set_X(self,X):
         """

From 7ad275ce8a81a0b6b61f3ae0c17090ce58f6b731 Mon Sep 17 00:00:00 2001
From: mellorjc <mojoeschmoe@gmail.com>
Date: Thu, 19 Feb 2015 11:31:46 +0000
Subject: [PATCH 012/166] matplotlib interactive mode only in IPython

have interactive mode only in IPython so that running scripts that plot from python behave like normal.
---
 GPy/plotting/matplot_dep/maps.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index fcb03b38..eef72a6a 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -6,7 +6,11 @@ try:
     from matplotlib.patches import Polygon
     from matplotlib.collections import PatchCollection
     #from matplotlib import cm
-    pb.ion()
+    try:
+        __IPYTHON__
+        pb.ion()
+    except:
+        pass
 except:
     pass
 import re

From f25797cd617655f73b828f803e38ccc3e7144e60 Mon Sep 17 00:00:00 2001
From: mellorjc <mojoeschmoe@gmail.com>
Date: Thu, 19 Feb 2015 11:45:57 +0000
Subject: [PATCH 013/166] catch only a specific error

catch only NameError, rather than everything.
---
 GPy/plotting/matplot_dep/maps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index eef72a6a..a651f34d 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -9,7 +9,7 @@ try:
     try:
         __IPYTHON__
         pb.ion()
-    except:
+    except NameError:
         pass
 except:
     pass

From 1bbf58fdcfc174ae822900e30e7910cf6a15165c Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Wed, 25 Feb 2015 21:38:21 +0000
Subject: [PATCH 014/166] Updated README.md to refer to GPy/testing for running
 the tests

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5e98af85..68b66b31 100644
--- a/README.md
+++ b/README.md
@@ -105,7 +105,7 @@ Ensure nose is installed via pip:
 
 Run nosetests from the root directory of the repository:
 
-    nosetests -v
+    nosetests -v GPy/testing
 
 or from within IPython
 

From 5e4afb765a2aca96026159ab967a427ab922c919 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 07:10:10 +0000
Subject: [PATCH 015/166] Relative import fixes for Python 3 compatibility

---
 GPy/__init__.py                       | 26 ++++++++---------
 GPy/core/__init__.py                  | 16 +++++------
 GPy/core/parameterization/__init__.py |  4 +--
 GPy/examples/__init__.py              |  8 +++---
 GPy/inference/__init__.py             |  6 ++--
 GPy/kern/__init__.py                  | 36 ++++++++++++------------
 GPy/likelihoods/__init__.py           | 16 +++++------
 GPy/mappings/__init__.py              |  6 ++--
 GPy/models/__init__.py                | 40 +++++++++++++--------------
 GPy/plotting/__init__.py              |  2 +-
 GPy/util/__init__.py                  | 28 +++++++++----------
 11 files changed, 94 insertions(+), 94 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 5e091170..26713406 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -3,23 +3,23 @@
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 
-import core
-from core.parameterization import transformations, priors
+from . import core
+from .core.parameterization import transformations, priors
 constraints = transformations
-import models
-import mappings
-import inference
-import util
-import examples
-import likelihoods
-import testing
+from . import models
+from . import mappings
+from . import inference
+from . import util
+from . import examples
+from . import likelihoods
+from . import testing
 from numpy.testing import Tester
-import kern
-import plotting
+from . import kern
+from . import plotting
 
 # Direct imports for convenience:
-from core import Model
-from core.parameterization import Param, Parameterized, ObsAr
+from .core import Model
+from .core.parameterization import Param, Parameterized, ObsAr
 
 #@nottest
 try:
diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index ebed29bb..142eccbf 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -1,12 +1,12 @@
 # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from model import *
-from parameterization.parameterized import adjust_name_for_printing, Parameterizable
-from parameterization.param import Param, ParamConcatenation
-from parameterization.observable_array import ObsAr
+from .model import *
+from .parameterization.parameterized import adjust_name_for_printing, Parameterizable
+from .parameterization.param import Param, ParamConcatenation
+from .parameterization.observable_array import ObsAr
 
-from gp import GP
-from svgp import SVGP
-from sparse_gp import SparseGP
-from mapping import *
+from .gp import GP
+from .svgp import SVGP
+from .sparse_gp import SparseGP
+from .mapping import *
diff --git a/GPy/core/parameterization/__init__.py b/GPy/core/parameterization/__init__.py
index 8e9aa094..de736671 100644
--- a/GPy/core/parameterization/__init__.py
+++ b/GPy/core/parameterization/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from param import Param, ObsAr
-from parameterized import Parameterized
+from .param import Param, ObsAr
+from .parameterized import Parameterized
diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 968333e0..4e9e984e 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import classification
-import regression
-import dimensionality_reduction
-import non_gaussian
+from . import classification
+from . import regression
+from . import dimensionality_reduction
+from . import non_gaussian
diff --git a/GPy/inference/__init__.py b/GPy/inference/__init__.py
index 7b1307e3..c5044582 100644
--- a/GPy/inference/__init__.py
+++ b/GPy/inference/__init__.py
@@ -1,3 +1,3 @@
-import latent_function_inference
-import optimization
-import mcmc
+from . import latent_function_inference
+from . import optimization
+from . import mcmc
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 718be74f..aaeb99a2 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -1,20 +1,20 @@
-from _src.kern import Kern
-from _src.rbf import RBF
-from _src.linear import Linear, LinearFull
-from _src.static import Bias, White, Fixed
-from _src.brownian import Brownian
-from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
-from _src.mlp import MLP
-from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
-from _src.independent_outputs import IndependentOutputs, Hierarchical
-from _src.coregionalize import Coregionalize
-from _src.ODE_UY import ODE_UY
-from _src.ODE_UYC import ODE_UYC
-from _src.ODE_st import ODE_st
-from _src.ODE_t import ODE_t
-from _src.poly import Poly
-from _src.eq_ode2 import EQ_ODE2
+from ._src.kern import Kern
+from ._src.rbf import RBF
+from ._src.linear import Linear, LinearFull
+from ._src.static import Bias, White, Fixed
+from ._src.brownian import Brownian
+from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
+from ._src.mlp import MLP
+from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
+from ._src.independent_outputs import IndependentOutputs, Hierarchical
+from ._src.coregionalize import Coregionalize
+from ._src.ODE_UY import ODE_UY
+from ._src.ODE_UYC import ODE_UYC
+from ._src.ODE_st import ODE_st
+from ._src.ODE_t import ODE_t
+from ._src.poly import Poly
+from ._src.eq_ode2 import EQ_ODE2
 
-from _src.trunclinear import TruncLinear,TruncLinear_inf
-from _src.splitKern import SplitKern,DiffGenomeKern
+from ._src.trunclinear import TruncLinear,TruncLinear_inf
+from ._src.splitKern import SplitKern,DiffGenomeKern
 
diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py
index 28e44541..ef29da08 100644
--- a/GPy/likelihoods/__init__.py
+++ b/GPy/likelihoods/__init__.py
@@ -1,8 +1,8 @@
-from bernoulli import Bernoulli
-from exponential import Exponential
-from gaussian import Gaussian
-from gamma import Gamma
-from poisson import Poisson
-from student_t import StudentT
-from likelihood import Likelihood
-from mixed_noise import MixedNoise
+from .bernoulli import Bernoulli
+from .exponential import Exponential
+from .gaussian import Gaussian
+from .gamma import Gamma
+from .poisson import Poisson
+from .student_t import StudentT
+from .likelihood import Likelihood
+from .mixed_noise import MixedNoise
diff --git a/GPy/mappings/__init__.py b/GPy/mappings/__init__.py
index d331c678..d9c13ad0 100644
--- a/GPy/mappings/__init__.py
+++ b/GPy/mappings/__init__.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kernel import Kernel
-from linear import Linear
-from mlp import MLP
+from .kernel import Kernel
+from .linear import Linear
+from .mlp import MLP
 #from rbf import RBF
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index c6abb5de..8f8fd838 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -1,23 +1,23 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from gp_regression import GPRegression
-from gp_classification import GPClassification
-from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
-from sparse_gp_classification import SparseGPClassification
-from gplvm import GPLVM
-from bcgplvm import BCGPLVM
-from sparse_gplvm import SparseGPLVM
-from warped_gp import WarpedGP
-from bayesian_gplvm import BayesianGPLVM
-from mrd import MRD
-from gradient_checker import GradientChecker
-from ss_gplvm import SSGPLVM
-from gp_coregionalized_regression import GPCoregionalizedRegression
-from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
-from gp_heteroscedastic_regression import GPHeteroscedasticRegression
-from ss_mrd import SSMRD
-from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
-from gp_var_gauss import GPVariationalGaussianApproximation
-from one_vs_all_classification import OneVsAllClassification
-from one_vs_all_sparse_classification import OneVsAllSparseClassification
+from .gp_regression import GPRegression
+from .gp_classification import GPClassification
+from .sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
+from .sparse_gp_classification import SparseGPClassification
+from .gplvm import GPLVM
+from .bcgplvm import BCGPLVM
+from .sparse_gplvm import SparseGPLVM
+from .warped_gp import WarpedGP
+from .bayesian_gplvm import BayesianGPLVM
+from .mrd import MRD
+from .gradient_checker import GradientChecker
+from .ss_gplvm import SSGPLVM
+from .gp_coregionalized_regression import GPCoregionalizedRegression
+from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
+from .gp_heteroscedastic_regression import GPHeteroscedasticRegression
+from .ss_mrd import SSMRD
+from .gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
+from .gp_var_gauss import GPVariationalGaussianApproximation
+from .one_vs_all_classification import OneVsAllClassification
+from .one_vs_all_sparse_classification import OneVsAllSparseClassification
diff --git a/GPy/plotting/__init__.py b/GPy/plotting/__init__.py
index d3a96914..652bc628 100644
--- a/GPy/plotting/__init__.py
+++ b/GPy/plotting/__init__.py
@@ -2,6 +2,6 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 try:
-    import matplot_dep
+    from . import matplot_dep
 except (ImportError, NameError):
     print 'Fail to load GPy.plotting.matplot_dep.'
\ No newline at end of file
diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index c3edfc48..e8d2456e 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -2,18 +2,18 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-import linalg
-import misc
-import squashers
-import warping_functions
-import datasets
-import mocap
-import decorators
-import classification
-import subarray_and_sorting
-import caching
-import diag
-import initialization
-import multioutput
-import linalg_gpu
+from . import linalg
+from . import misc
+from . import squashers
+from . import warping_functions
+from . import datasets
+from . import mocap
+from . import decorators
+from . import classification
+from . import subarray_and_sorting
+from . import caching
+from . import diag
+from . import initialization
+from . import multioutput
+from . import linalg_gpu
 

From 2ca24a88f5431f370907e555f3ab402c71de7e6a Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 07:14:40 +0000
Subject: [PATCH 016/166] More relative import fixes for Python 3 compatibility

---
 GPy/core/gp.py                                   |  6 +++---
 GPy/core/mapping.py                              |  4 ++--
 GPy/core/model.py                                |  2 +-
 GPy/core/parameterization/index_operations.py    |  2 +-
 GPy/core/parameterization/lists_and_dicts.py     |  2 +-
 GPy/core/parameterization/observable.py          |  2 +-
 GPy/core/parameterization/observable_array.py    |  6 +++---
 GPy/core/parameterization/param.py               |  8 ++++----
 GPy/core/parameterization/parameter_core.py      | 14 +++++++-------
 GPy/core/parameterization/parameterized.py       |  4 ++--
 GPy/core/parameterization/priors.py              |  2 +-
 GPy/core/parameterization/ties_and_remappings.py |  4 ++--
 GPy/core/parameterization/transformations.py     |  2 +-
 GPy/core/parameterization/updateable.py          |  2 +-
 GPy/core/parameterization/variational.py         |  6 +++---
 GPy/core/sparse_gp.py                            |  6 +++---
 GPy/core/sparse_gp_mpi.py                        |  2 +-
 GPy/core/svgp.py                                 |  4 ++--
 GPy/likelihoods/bernoulli.py                     |  4 ++--
 GPy/likelihoods/exponential.py                   |  4 ++--
 GPy/likelihoods/gamma.py                         |  4 ++--
 GPy/likelihoods/gaussian.py                      |  4 ++--
 GPy/likelihoods/likelihood.py                    |  2 +-
 GPy/likelihoods/mixed_noise.py                   |  6 +++---
 GPy/likelihoods/poisson.py                       |  4 ++--
 GPy/likelihoods/student_t.py                     |  4 ++--
 GPy/util/choleskies.py                           |  2 +-
 GPy/util/datasets.py                             |  2 +-
 GPy/util/linalg.py                               |  2 +-
 GPy/util/ln_diff_erfs.py                         |  2 +-
 GPy/util/misc.py                                 |  2 +-
 31 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 3252ac08..5110e9a5 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -4,11 +4,11 @@
 import numpy as np
 import sys
 from .. import kern
-from model import Model
-from parameterization import ObsAr
+from .model import Model
+from .parameterization import ObsAr
 from .. import likelihoods
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
-from parameterization.variational import VariationalPosterior
+from .parameterization.variational import VariationalPosterior
 
 import logging
 from GPy.util.normalizer import MeanNorm
diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 111fec6f..163db0c9 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import sys
-from parameterization import Parameterized
+from .parameterization import Parameterized
 import numpy as np
 
 class Mapping(Parameterized):
@@ -74,7 +74,7 @@ class Bijective_mapping(Mapping):
         """Inverse mapping from output domain of the function to the inputs."""
         raise NotImplementedError
 
-from model import Model
+from .model import Model
 
 class Mapping_check_model(Model):
     """
diff --git a/GPy/core/model.py b/GPy/core/model.py
index c63a29e5..8eb34f33 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -5,7 +5,7 @@
 from .. import likelihoods
 from ..inference import optimization
 from ..util.misc import opt_wrapper
-from parameterization import Parameterized
+from .parameterization import Parameterized
 import multiprocessing as mp
 import numpy as np
 from numpy.linalg.linalg import LinAlgError
diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 61c82da1..e5273e55 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -3,7 +3,7 @@
 
 import numpy
 from numpy.lib.function_base import vectorize
-from lists_and_dicts import IntArrayDict
+from .lists_and_dicts import IntArrayDict
 
 def extract_properties_to_index(index, props):
     prop_index = dict()
diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py
index 5afbb8ed..626603ec 100644
--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@@ -75,7 +75,7 @@ class ObserverList(object):
 
     def __str__(self):
         from . import ObsAr, Param
-        from parameter_core import Parameterizable
+        from .parameter_core import Parameterizable
         ret = []
         curr_p = None
         
diff --git a/GPy/core/parameterization/observable.py b/GPy/core/parameterization/observable.py
index 8a85c6ca..0836b5d6 100644
--- a/GPy/core/parameterization/observable.py
+++ b/GPy/core/parameterization/observable.py
@@ -12,7 +12,7 @@ class Observable(object):
     """
     def __init__(self, *args, **kwargs):
         super(Observable, self).__init__()
-        from lists_and_dicts import ObserverList
+        from .lists_and_dicts import ObserverList
         self.observers = ObserverList()
         self._update_on = True
 
diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py
index 271fe7b9..c6fea497 100644
--- a/GPy/core/parameterization/observable_array.py
+++ b/GPy/core/parameterization/observable_array.py
@@ -3,8 +3,8 @@
 
 
 import numpy as np
-from parameter_core import Pickleable
-from observable import Observable
+from .parameter_core import Pickleable
+from .observable import Observable
 
 class ObsAr(np.ndarray, Pickleable, Observable):
     """
@@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
         return self.view(np.ndarray)
 
     def copy(self):
-        from lists_and_dicts import ObserverList
+        from .lists_and_dicts import ObserverList
         memo = {}
         memo[id(self)] = self
         memo[id(self.observers)] = ObserverList()
diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index 1246bc18..fbbb59ed 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -4,8 +4,8 @@
 import itertools
 import numpy
 np = numpy
-from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
-from observable_array import ObsAr
+from .parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
+from .observable_array import ObsAr
 
 ###### printing
 __constraints_name__ = "Constraint"
@@ -156,7 +156,7 @@ class Param(Parameterizable, ObsAr):
     #===========================================================================
     @property
     def is_fixed(self):
-        from transformations import __fixed__
+        from .transformations import __fixed__
         return self.constraints[__fixed__].size == self.size
 
     def _get_original(self, param):
@@ -313,7 +313,7 @@ class ParamConcatenation(object):
         See :py:class:`GPy.core.parameter.Param` for more details on constraining.
         """
         # self.params = params
-        from lists_and_dicts import ArrayList
+        from .lists_and_dicts import ArrayList
         self.params = ArrayList([])
         for p in params:
             for p in p.flattened_parameters:
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index bee160b2..5baa81c8 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -13,11 +13,11 @@ Observable Pattern for patameterization
 
 """
 
-from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
+from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np
 import re
 import logging
-from updateable import Updateable
+from .updateable import Updateable
 
 class HierarchyError(Exception):
     """
@@ -170,7 +170,7 @@ class Pickleable(object):
 
     def __setstate__(self, state):
         self.__dict__.update(state)
-        from lists_and_dicts import ObserverList
+        from .lists_and_dicts import ObserverList
         self.observers = ObserverList()
         self._setup_observers()
         self._optimizer_copy_transformed = False
@@ -268,7 +268,7 @@ class Indexable(Nameable, Updateable):
     def __init__(self, name, default_constraint=None, *a, **kw):
         super(Indexable, self).__init__(name=name, *a, **kw)
         self._default_constraint_ = default_constraint
-        from index_operations import ParameterIndexOperations
+        from .index_operations import ParameterIndexOperations
         self.constraints = ParameterIndexOperations()
         self.priors = ParameterIndexOperations()
         if self._default_constraint_ is not None:
@@ -310,7 +310,7 @@ class Indexable(Nameable, Updateable):
         that is an int array, containing the indexes for the flattened
         param inside this parameterized logic.
         """
-        from param import ParamConcatenation
+        from .param import ParamConcatenation
         if isinstance(param, ParamConcatenation):
             return np.hstack((self._raveled_index_for(p) for p in param.params))
         return param._raveled_index() + self._offset_for(param)
@@ -407,7 +407,7 @@ class Indexable(Nameable, Updateable):
         repriorized = self.unset_priors()
         self._add_to_index_operations(self.priors, repriorized, prior, warning)
 
-        from domains import _REAL, _POSITIVE, _NEGATIVE
+        from .domains import _REAL, _POSITIVE, _NEGATIVE
         if prior.domain is _POSITIVE:
             self.constrain_positive(warning)
         elif prior.domain is _NEGATIVE:
@@ -536,7 +536,7 @@ class Indexable(Nameable, Updateable):
         update the constraints and priors view, so that
         constraining is automized for the parent.
         """
-        from index_operations import ParameterIndexOperationsView
+        from .index_operations import ParameterIndexOperationsView
         #if getattr(self, "_in_init_"):
             #import ipdb;ipdb.set_trace()
             #self.constraints.update(param.constraints, start)
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 44173f58..6bdd8036 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -5,8 +5,8 @@
 import numpy; np = numpy
 import itertools
 from re import compile, _pattern_type
-from param import ParamConcatenation
-from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
+from .param import ParamConcatenation
+from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
 
 import logging
 from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 84b6357e..61835e28 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -5,7 +5,7 @@
 import numpy as np
 from scipy.special import gammaln, digamma
 from ...util.linalg import pdinv
-from domains import _REAL, _POSITIVE
+from .domains import _REAL, _POSITIVE
 import warnings
 import weakref
 
diff --git a/GPy/core/parameterization/ties_and_remappings.py b/GPy/core/parameterization/ties_and_remappings.py
index a81b8d61..f0bb2d61 100644
--- a/GPy/core/parameterization/ties_and_remappings.py
+++ b/GPy/core/parameterization/ties_and_remappings.py
@@ -2,8 +2,8 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from parameterized import Parameterized
-from param import Param
+from .parameterized import Parameterized
+from .param import Param
 
 class Remapping(Parameterized):
     def mapping(self):
diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py
index d929b1d9..181c16e0 100644
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@@ -3,7 +3,7 @@
 
 
 import numpy as np
-from domains import _POSITIVE,_NEGATIVE, _BOUNDED
+from .domains import _POSITIVE,_NEGATIVE, _BOUNDED
 import weakref
 
 import sys
diff --git a/GPy/core/parameterization/updateable.py b/GPy/core/parameterization/updateable.py
index 278ba8cd..86446fa0 100644
--- a/GPy/core/parameterization/updateable.py
+++ b/GPy/core/parameterization/updateable.py
@@ -3,7 +3,7 @@ Created on 11 Nov 2014
 
 @author: maxz
 '''
-from observable import Observable
+from .observable import Observable
 
 
 class Updateable(Observable):
diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 7cc5c99a..25efdc92 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -5,9 +5,9 @@ Created on 6 Nov 2013
 '''
 
 import numpy as np
-from parameterized import Parameterized
-from param import Param
-from transformations import Logexp, Logistic,__fixed__
+from .parameterized import Parameterized
+from .param import Param
+from .transformations import Logexp, Logistic,__fixed__
 from GPy.util.misc import param_to_array
 from GPy.util.caching import Cache_this
 
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 9004c9c7..a9866f48 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -2,11 +2,11 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from gp import GP
-from parameterization.param import Param
+from .gp import GP
+from .parameterization.param import Param
 from ..inference.latent_function_inference import var_dtc
 from .. import likelihoods
-from parameterization.variational import VariationalPosterior, NormalPosterior
+from .parameterization.variational import VariationalPosterior, NormalPosterior
 from ..util.linalg import mdot
 
 import logging
diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index 15d3ad76..ac53d4ac 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from sparse_gp import SparseGP
+from .sparse_gp import SparseGP
 from numpy.linalg.linalg import LinAlgError
 from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
 
diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py
index 603a64a5..9d74889a 100644
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 from ..util import choleskies
-from sparse_gp import SparseGP
-from parameterization.param import Param
+from .sparse_gp import SparseGP
+from .parameterization.param import Param
 from ..inference.latent_function_inference import SVGP as svgp_inf
 
 
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index ff2ab30a..2c246042 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -3,8 +3,8 @@
 
 import numpy as np
 from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
-import link_functions
-from likelihood import Likelihood
+from . import link_functions
+from .likelihood import Likelihood
 from scipy import stats
 
 class Bernoulli(Likelihood):
diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py
index 8110c7d4..1df48412 100644
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@@ -5,8 +5,8 @@
 import numpy as np
 from scipy import stats,special
 import scipy as sp
-import link_functions
-from likelihood import Likelihood
+from . import link_functions
+from .likelihood import Likelihood
 
 class Exponential(Likelihood):
     """
diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py
index c79e196c..c153bd1c 100644
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@@ -6,8 +6,8 @@ import numpy as np
 from scipy import stats,special
 import scipy as sp
 from ..core.parameterization import Param
-import link_functions
-from likelihood import Likelihood
+from . import link_functions
+from .likelihood import Likelihood
 
 class Gamma(Likelihood):
     """
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index a6e5b7e0..8029eeba 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -13,8 +13,8 @@ James 11/12/13
 
 import numpy as np
 from scipy import stats, special
-import link_functions
-from likelihood import Likelihood
+from . import link_functions
+from .likelihood import Likelihood
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
 from scipy import stats
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 790c6ba4..33698eb2 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -4,7 +4,7 @@
 import numpy as np
 from scipy import stats,special
 import scipy as sp
-import link_functions
+from . import link_functions
 from ..util.misc import chain_1, chain_2, chain_3
 from scipy.integrate import quad
 import warnings
diff --git a/GPy/likelihoods/mixed_noise.py b/GPy/likelihoods/mixed_noise.py
index 8c56f45b..84b3001d 100644
--- a/GPy/likelihoods/mixed_noise.py
+++ b/GPy/likelihoods/mixed_noise.py
@@ -3,9 +3,9 @@
 
 import numpy as np
 from scipy import stats, special
-import link_functions
-from likelihood import Likelihood
-from gaussian import Gaussian
+from . import link_functions
+from .likelihood import Likelihood
+from .gaussian import Gaussian
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
 from ..core.parameterization import Parameterized
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index ea9b2d10..d6c4334b 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -5,8 +5,8 @@ from __future__ import division
 import numpy as np
 from scipy import stats,special
 import scipy as sp
-import link_functions
-from likelihood import Likelihood
+from . import link_functions
+from .likelihood import Likelihood
 
 class Poisson(Likelihood):
     """
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index 855f6b40..745ce9e8 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -4,10 +4,10 @@
 import numpy as np
 from scipy import stats, special
 import scipy as sp
-import link_functions
+from . import link_functions
 from scipy import stats, integrate
 from scipy.special import gammaln, gamma
-from likelihood import Likelihood
+from .likelihood import Likelihood
 from ..core.parameterization import Param
 from ..core.parameterization.transformations import Logexp
 
diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index 3f37fc3f..cc3a7f75 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 from scipy import weave
-import linalg
+from . import linalg
 
 
 def safe_root(N):
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 254639a6..10835463 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -11,7 +11,7 @@ import datetime
 import json
 import re
 
-from config import *
+from .config import *
 
 ipython_available=True
 try:
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index b148f2f4..216a1050 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -13,7 +13,7 @@ from ctypes import byref, c_char, c_int, c_double # TODO
 import scipy
 import warnings
 import os
-from config import config
+from .config import config
 import logging
 
 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
diff --git a/GPy/util/ln_diff_erfs.py b/GPy/util/ln_diff_erfs.py
index bb9cfe03..582a4585 100644
--- a/GPy/util/ln_diff_erfs.py
+++ b/GPy/util/ln_diff_erfs.py
@@ -6,7 +6,7 @@ try:
     from scipy.special import erfcx, erf
 except ImportError:
     from scipy.special import erf
-    from erfcx import erfcx
+    from .erfcx import erfcx
 
 import numpy as np
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index bf37159d..1f746e19 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from config import *
+from .config import *
 
 def chain_1(df_dg, dg_dx):
     """

From 1521b3e26020f1ff52d435bf5e17acaf99522528 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:11:11 +0000
Subject: [PATCH 017/166] Convert print to function for Python 3 compatibility.
 This breaks compatibility for versions of Python < 2.6

---
 GPy/core/gp.py                                 |  4 ++--
 GPy/core/model.py                              | 18 +++++++++---------
 GPy/core/parameterization/parameter_core.py    |  2 +-
 GPy/core/parameterization/parameterized.py     |  2 +-
 .../parameterization/ties_and_remappings.py    |  8 ++++----
 GPy/core/parameterization/transformations.py   | 16 ++++++++--------
 GPy/core/parameterization/updateable.py        |  2 +-
 GPy/core/sparse_gp.py                          |  2 +-
 GPy/core/sparse_gp_mpi.py                      |  2 +-
 GPy/core/verbose_optimization.py               | 18 +++++++++---------
 10 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 5110e9a5..0ef6e15e 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -82,7 +82,7 @@ class GP(Model):
                 inference_method = exact_gaussian_inference.ExactGaussianInference()
             else:
                 inference_method = expectation_propagation.EP()
-                print "defaulting to ", inference_method, "for latent function inference"
+                print("defaulting to ", inference_method, "for latent function inference")
         self.inference_method = inference_method
 
         logger.info("adding kernel and likelihood as parameters")
@@ -441,7 +441,7 @@ class GP(Model):
         try:
             super(GP, self).optimize(optimizer, start, **kwargs)
         except KeyboardInterrupt:
-            print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
+            print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
             self.inference_method.on_optimization_end()
             raise
 
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 8eb34f33..348cebf1 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -82,7 +82,7 @@ class Model(Parameterized):
                 pool.close()  # signal that no more data coming in
                 pool.join()  # wait for all the tasks to complete
             except KeyboardInterrupt:
-                print "Ctrl+c received, terminating and joining pool."
+                print("Ctrl+c received, terminating and joining pool.")
                 pool.terminate()
                 pool.join()
 
@@ -95,10 +95,10 @@ class Model(Parameterized):
                     self.optimization_runs.append(jobs[i].get())
 
                 if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
+                    print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)))
             except Exception as e:
                 if robust:
-                    print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
+                    print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)))
                 else:
                     raise e
 
@@ -237,10 +237,10 @@ class Model(Parameterized):
 
         """
         if self.is_fixed or self.size == 0:
-            print 'nothing to optimize'
+            print('nothing to optimize')
 
         if not self.update_model():
-            print "updates were off, setting updates on again"
+            print("updates were off, setting updates on again")
             self.update_model(True)
 
         if start == None:
@@ -305,7 +305,7 @@ class Model(Parameterized):
                     transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
 
                 if transformed_index.size == 0:
-                    print "No free parameters to check"
+                    print("No free parameters to check")
                     return
 
             # just check the global ratio
@@ -342,7 +342,7 @@ class Model(Parameterized):
             header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
             header_string = map(lambda x: '|'.join(x), [header_string])
             separator = '-' * len(header_string[0])
-            print '\n'.join([header_string[0], separator])
+            print('\n'.join([header_string[0], separator]))
             if target_param is None:
                 param_index = range(len(x))
                 transformed_index = param_index
@@ -358,7 +358,7 @@ class Model(Parameterized):
                     transformed_index = param_index
 
                 if param_index.size == 0:
-                    print "No free parameters to check"
+                    print("No free parameters to check")
                     return
 
             gradient = self._grads(x).copy()
@@ -392,7 +392,7 @@ class Model(Parameterized):
                 ng = '%.6f' % float(numerical_gradient)
                 df = '%1.e' % float(df_ratio)
                 grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
-                print grad_string
+                print(grad_string)
 
             self.optimizer_array = x
             return ret
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 5baa81c8..06991ab0 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -558,7 +558,7 @@ class Indexable(Nameable, Updateable):
         """
         if warning and reconstrained.size > 0:
             # TODO: figure out which parameters have changed and only print those
-            print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
+            print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name))
         index = self._raveled_index()
         which.add(what, index)
         return index
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 6bdd8036..1a5ff123 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -325,7 +325,7 @@ class Parameterized(Parameterizable):
             self._notify_parent_change()
             self.parameters_changed()
         except Exception as e:
-            print "WARNING: caught exception {!s}, trying to continue".format(e)
+            print("WARNING: caught exception {!s}, trying to continue".format(e))
 
     def copy(self, memo=None):
         if memo is None:
diff --git a/GPy/core/parameterization/ties_and_remappings.py b/GPy/core/parameterization/ties_and_remappings.py
index f0bb2d61..bafa8a98 100644
--- a/GPy/core/parameterization/ties_and_remappings.py
+++ b/GPy/core/parameterization/ties_and_remappings.py
@@ -98,7 +98,7 @@ class Tie(Parameterized):
             if np.all(self.label_buf[idx]==0):
                 # None of p has been tied before.
                 tie_idx = self._expandTieParam(1)
-                print tie_idx
+                print(tie_idx)
                 tie_id = self.label_buf.max()+1
                 self.label_buf[tie_idx] = tie_id
             else:
@@ -189,14 +189,14 @@ class Tie(Parameterized):
                 b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
                 b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
                 if b.sum()==0:
-                    print 'XXX'
+                    print('XXX')
                     continue
                 elif b.sum()==1:
-                    print '!!!'
+                    print('!!!')
                     val = self._highest_parent_.param_array[b0][b][0]
                     self._highest_parent_.param_array[b0] = val
                 else:
-                    print '@@@'
+                    print('@@@')
                     self._highest_parent_.param_array[b0] = self.tied_param[i]
                 changed = True
         return changed
diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py
index 181c16e0..05051c92 100644
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@@ -72,7 +72,7 @@ class Logexp(Transformation):
         return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
     def initialize(self, f):
         if np.any(f < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
         return np.abs(f)
     def __str__(self):
         return '+ve'
@@ -130,7 +130,7 @@ class NormalTheta(Transformation):
 
     def initialize(self, f):
         if np.any(f[self.var_indices] < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
             f[self.var_indices] = np.abs(f[self.var_indices])
         return f
 
@@ -177,7 +177,7 @@ class NormalNaturalAntti(NormalTheta):
 
     def initialize(self, f):
         if np.any(f[self.var_indices] < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
             f[self.var_indices] = np.abs(f[self.var_indices])
         return f
 
@@ -220,7 +220,7 @@ class NormalEta(Transformation):
 
     def initialize(self, f):
         if np.any(f[self.var_indices] < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
             f[self.var_indices] = np.abs(f[self.var_indices])
         return f
 
@@ -360,7 +360,7 @@ class LogexpNeg(Transformation):
         return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
     def initialize(self, f):
         if np.any(f < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
         return np.abs(f)
     def __str__(self):
         return '+ve'
@@ -412,7 +412,7 @@ class LogexpClipped(Logexp):
         return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
     def initialize(self, f):
         if np.any(f < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
         return np.abs(f)
     def __str__(self):
         return '+ve_c'
@@ -428,7 +428,7 @@ class Exponent(Transformation):
         return np.einsum('i,i->i', df, f)
     def initialize(self, f):
         if np.any(f < 0.):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
         return np.abs(f)
     def __str__(self):
         return '+ve'
@@ -486,7 +486,7 @@ class Logistic(Transformation):
         return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
     def initialize(self, f):
         if np.any(np.logical_or(f < self.lower, f > self.upper)):
-            print "Warning: changing parameters to satisfy constraints"
+            print("Warning: changing parameters to satisfy constraints")
         #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
         #FIXME: Max, zeros_like right?
         return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
diff --git a/GPy/core/parameterization/updateable.py b/GPy/core/parameterization/updateable.py
index 86446fa0..6abf0280 100644
--- a/GPy/core/parameterization/updateable.py
+++ b/GPy/core/parameterization/updateable.py
@@ -36,7 +36,7 @@ class Updateable(Observable):
         self.trigger_update()
 
     def toggle_update(self):
-        print "deprecated: toggle_update was renamed to update_toggle for easier access"
+        print("deprecated: toggle_update was renamed to update_toggle for easier access")
         self.update_toggle()
     def update_toggle(self):
         self.update_model(not self.update_model())
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index a9866f48..96e3dbe7 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -49,7 +49,7 @@ class SparseGP(GP):
             else:
                 #inference_method = ??
                 raise NotImplementedError, "what to do what to do?"
-            print "defaulting to ", inference_method, "for latent function inference"
+            print("defaulting to ", inference_method, "for latent function inference")
 
         self.Z = Param('inducing inputs', Z)
         self.num_inducing = Z.shape[0]
diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index ac53d4ac..28de3124 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -56,7 +56,7 @@ class SparseGP_MPI(SparseGP):
             self.N_range = (N_start, N_end)
             self.N_list = np.array(N_list)
             self.Y_local = self.Y[N_start:N_end]
-            print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range)
+            print('MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range))
             mpi_comm.Bcast(self.param_array, root=0)
         self.update_model(True)
 
diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index 78b6127e..affa7d43 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012-2014, Max Zwiessele.
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
+from __future__ import print_function
 import numpy as np
 import sys
 import time
@@ -65,8 +65,8 @@ class VerboseOptimization(object):
                 #self.progress.add_class('box-flex1')
             else:
                 self.exps = exponents(self.fnow, self.current_gradient)
-                print 'Running {} Code:'.format(self.opt_name)
-                print ' {3:7s}   {0:{mi}s}   {1:11s}    {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters)
+                print('Running {} Code:'.format(self.opt_name))
+                print(' {3:7s}   {0:{mi}s}   {1:11s}    {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
 
     def __enter__(self):
         self.start = time.time()
@@ -107,11 +107,11 @@ class VerboseOptimization(object):
                 b = np.any(n_exps < self.exps)
                 if a or b:
                     self.p_iter = self.iteration
-                    print ''
+                    print('')
                 if b:
                     self.exps = n_exps
-            print '\r',
-            print '{3:> 7.2g}  {0:>0{mi}g}  {1:> 12e}  {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+            print('\r', end=' ')
+            print('{3:> 7.2g}  {0:>0{mi}g}  {1:> 12e}  {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
             sys.stdout.flush()
 
     def print_status(self, me, which=None):
@@ -140,6 +140,6 @@ class VerboseOptimization(object):
             self.print_out()
 
             if not self.ipython_notebook:
-                print
-                print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
-                print
+                print()
+                print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
+                print()

From d284953b6933fda5eab5d4d2f6154ce82b94769d Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:17:41 +0000
Subject: [PATCH 018/166] Added details of Python 3 work

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 68b66b31..4dc5b807 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,12 @@
 # GPy
 
+# Moving to Python 3
+Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and abive.
+
+Work done so far:
+
+* Use 2to3 to fix relative imports
+* use 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.  
 
 A Gaussian processes framework in Python.
 

From 906f69e20e04883f5d19c4f918b29e2362ca365a Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:31:44 +0000
Subject: [PATCH 019/166] Convert print to function for Python 3 compatibility.

---
 GPy/examples/classification.py           | 24 ++++++-------
 GPy/examples/dimensionality_reduction.py | 12 +++----
 GPy/examples/non_gaussian.py             | 44 ++++++++++++------------
 GPy/examples/regression.py               | 26 +++++++-------
 README.md                                | 16 ++++-----
 5 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index b3780073..d4518f24 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -15,7 +15,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
 
     """
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.oil()
     X = data['X']
     Xtest = data['Xtest']
@@ -52,7 +52,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
     """
 
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.toy_linear_1d_classification(seed=seed)
     Y = data['Y'][:, 0:1]
     Y[Y.flatten() == -1] = 0
@@ -75,7 +75,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
         m.plot_f(ax=axes[0])
         m.plot(ax=axes[1])
 
-    print m
+    print(m)
     return m
 
 def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
@@ -88,7 +88,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
     """
 
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.toy_linear_1d_classification(seed=seed)
     Y = data['Y'][:, 0:1]
     Y[Y.flatten() == -1] = 0
@@ -114,7 +114,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
         m.plot_f(ax=axes[0])
         m.plot(ax=axes[1])
 
-    print m
+    print(m)
     return m
 
 def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
@@ -127,7 +127,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
     """
 
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.toy_linear_1d_classification(seed=seed)
     Y = data['Y'][:, 0:1]
     Y[Y.flatten() == -1] = 0
@@ -147,7 +147,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
         m.plot_f(ax=axes[0])
         m.plot(ax=axes[1])
 
-    print m
+    print(m)
     return m
 
 def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
@@ -160,7 +160,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
     """
 
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.toy_linear_1d_classification(seed=seed)
     Y = data['Y'][:, 0:1]
     Y[Y.flatten() == -1] = 0
@@ -177,7 +177,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
         # Parameters optimization:
         for _ in range(5):
             m.optimize(max_iters=int(max_iters/5))
-        print m
+        print(m)
 
     # Plot
     if plot:
@@ -186,7 +186,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
         m.plot_f(ax=axes[0])
         m.plot(ax=axes[1])
 
-    print m
+    print(m)
     return m
 
 def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
@@ -202,7 +202,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
     :type kernel: a GPy kernel
     """
     try:import pods
-    except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+    except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
     data = pods.datasets.crescent_data(seed=seed)
     Y = data['Y']
     Y[Y.flatten()==-1] = 0
@@ -224,5 +224,5 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
     if plot:
         m.plot()
 
-    print m
+    print(m)
     return m
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index df9093a2..fe1fa1e5 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -333,7 +333,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
     m.likelihood.variance = .1
 
     if optimize:
-        print "Optimizing model:"
+        print("Optimizing model:")
         m.optimize('bfgs', messages=verbose, max_iters=max_iters,
                    gtol=.05)
     if plot:
@@ -358,7 +358,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
     m.likelihood.variance = .1
 
     if optimize:
-        print "Optimizing model:"
+        print("Optimizing model:")
         m.optimize('scg', messages=verbose, max_iters=max_iters,
                    gtol=.05)
     if plot:
@@ -388,7 +388,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
     m.Yreal = Y
 
     if optimize:
-        print "Optimizing model:"
+        print("Optimizing model:")
         m.optimize('bfgs', messages=verbose, max_iters=max_iters,
                    gtol=.05)
     if plot:
@@ -411,7 +411,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
     m['.*noise'] = [Y.var() / 40. for Y in Ylist]
 
     if optimize:
-        print "Optimizing Model:"
+        print("Optimizing Model:")
         m.optimize(messages=verbose, max_iters=8e3)
     if plot:
         m.X.plot("MRD Latent Space 1D")
@@ -439,7 +439,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
             initx="random", initz='permute', **kw)
 
     if optimize:
-        print "Optimizing Model:"
+        print("Optimizing Model:")
         m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
     if plot:
         m.X.plot("MRD Latent Space 1D")
@@ -603,7 +603,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
     try:
         if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
     except KeyboardInterrupt:
-        print "Keyboard interrupt, continuing to plot and return"
+        print("Keyboard interrupt, continuing to plot and return")
 
     if plot:
         fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py
index ddac8813..3652b4d3 100644
--- a/GPy/examples/non_gaussian.py
+++ b/GPy/examples/non_gaussian.py
@@ -37,7 +37,7 @@ def student_t_approx(optimize=True, plot=True):
 
     #Add student t random noise to datapoints
     deg_free = 1
-    print "Real noise: ", real_std
+    print("Real noise: ", real_std)
     initial_var_guess = 0.5
     edited_real_sd = initial_var_guess
 
@@ -73,7 +73,7 @@ def student_t_approx(optimize=True, plot=True):
     m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
     m4['.*white'].constrain_fixed(1e-5)
     m4.randomize()
-    print m4
+    print(m4)
     debug=True
     if debug:
         m4.optimize(messages=1)
@@ -81,18 +81,18 @@ def student_t_approx(optimize=True, plot=True):
         pb.plot(m4.X, m4.inference_method.f_hat)
         pb.plot(m4.X, m4.Y, 'rx')
         m4.plot()
-        print m4
+        print(m4)
         return m4
 
     if optimize:
         optimizer='scg'
-        print "Clean Gaussian"
+        print("Clean Gaussian")
         m1.optimize(optimizer, messages=1)
-        print "Corrupt Gaussian"
+        print("Corrupt Gaussian")
         m2.optimize(optimizer, messages=1)
-        print "Clean student t"
+        print("Clean student t")
         m3.optimize(optimizer, messages=1)
-        print "Corrupt student t"
+        print("Corrupt student t")
         m4.optimize(optimizer, messages=1)
 
     if plot:
@@ -151,7 +151,7 @@ def boston_example(optimize=True, plot=True):
 
     for n, (train, test) in enumerate(kf):
         X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
-        print "Fold {}".format(n)
+        print("Fold {}".format(n))
 
         noise = 1e-1 #np.exp(-2)
         rbf_len = 0.5
@@ -163,21 +163,21 @@ def boston_example(optimize=True, plot=True):
         score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
 
         #Gaussian GP
-        print "Gauss GP"
+        print("Gauss GP")
         mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
         mgp.constrain_fixed('.*white', 1e-5)
         mgp['.*len'] = rbf_len
         mgp['.*noise'] = noise
-        print mgp
+        print(mgp)
         if optimize:
             mgp.optimize(optimizer=optimizer, messages=messages)
         Y_test_pred = mgp.predict(X_test)
         score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
         pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
-        print mgp
-        print pred_density
+        print(mgp)
+        print(pred_density)
 
-        print "Gaussian Laplace GP"
+        print("Gaussian Laplace GP")
         N, D = Y_train.shape
         g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
         g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
@@ -186,18 +186,18 @@ def boston_example(optimize=True, plot=True):
         mg.constrain_fixed('.*white', 1e-5)
         mg['rbf_len'] = rbf_len
         mg['noise'] = noise
-        print mg
+        print(mg)
         if optimize:
             mg.optimize(optimizer=optimizer, messages=messages)
         Y_test_pred = mg.predict(X_test)
         score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
         pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
-        print pred_density
-        print mg
+        print(pred_density)
+        print(mg)
 
         for stu_num, df in enumerate(degrees_freedoms):
             #Student T
-            print "Student-T GP {}df".format(df)
+            print("Student-T GP {}df".format(df))
             t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
             stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
             mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
@@ -205,14 +205,14 @@ def boston_example(optimize=True, plot=True):
             mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
             mstu_t['rbf_len'] = rbf_len
             mstu_t['.*t_scale2'] = noise
-            print mstu_t
+            print(mstu_t)
             if optimize:
                 mstu_t.optimize(optimizer=optimizer, messages=messages)
             Y_test_pred = mstu_t.predict(X_test)
             score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
             pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
-            print pred_density
-            print mstu_t
+            print(pred_density)
+            print(mstu_t)
 
     if plot:
         plt.figure()
@@ -230,8 +230,8 @@ def boston_example(optimize=True, plot=True):
         plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
         plt.title('Stu t {}df'.format(df))
 
-    print "Average scores: {}".format(np.mean(score_folds, 1))
-    print "Average pred density: {}".format(np.mean(pred_density, 1))
+    print("Average scores: {}".format(np.mean(score_folds, 1)))
+    print("Average pred density: {}".format(np.mean(pred_density, 1)))
 
     if plot:
         #Plotting
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 37a18f63..cf8205f9 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -15,7 +15,7 @@ def olympic_marathon_men(optimize=True, plot=True):
     """Run a standard Gaussian process regression on the Olympic marathon data."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.olympic_marathon_men()
 
@@ -88,7 +88,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
     """
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.epomeo_gpx()
     num_data_list = []
@@ -135,7 +135,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
 
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
     # data['Y'] = data['Y'][0::2, :]
@@ -219,7 +219,7 @@ def olympic_100m_men(optimize=True, plot=True):
     """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.olympic_100m_men()
 
@@ -240,7 +240,7 @@ def toy_rbf_1d(optimize=True, plot=True):
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.toy_rbf_1d()
 
@@ -258,7 +258,7 @@ def toy_rbf_1d_50(optimize=True, plot=True):
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.toy_rbf_1d_50()
 
@@ -377,7 +377,7 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
     """Predict the location of a robot given wirelss signal strength readings."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.robot_wireless()
 
@@ -398,14 +398,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
 
     sse = ((data['Xtest'] - Xpredict)**2).sum()
 
-    print('Sum of squares error on test data: ' + str(sse))
+    print(('Sum of squares error on test data: ' + str(sse)))
     return m
 
 def silhouette(max_iters=100, optimize=True, plot=True):
     """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
     try:import pods
     except ImportError:
-        print 'pods unavailable, see https://github.com/sods/ods for example datasets'
+        print('pods unavailable, see https://github.com/sods/ods for example datasets')
         return
     data = pods.datasets.silhouette()
 
@@ -416,7 +416,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
     if optimize:
         m.optimize(messages=True, max_iters=max_iters)
 
-    print m
+    print(m)
     return m
 
 def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
@@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
     if plot:
         m.plot()
 
-    print m
+    print(m)
     return m
 
 def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
@@ -492,7 +492,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
     if plot:
         m.plot(ax=axes[0])
         axes[0].set_title('no input uncertainty')
-    print m
+    print(m)
 
     # the same Model with uncertainty
     m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
@@ -503,5 +503,5 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
         axes[1].set_title('with input uncertainty')
         fig.canvas.draw()
 
-    print m
+    print(m)
     return m
diff --git a/README.md b/README.md
index 4dc5b807..2e9dc58a 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,5 @@
 # GPy
 
-# Moving to Python 3
-Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and abive.
-
-Work done so far:
-
-* Use 2to3 to fix relative imports
-* use 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.  
-
 A Gaussian processes framework in Python.
 
 * [GPy homepage](http://sheffieldml.github.io/GPy/)
@@ -18,6 +10,14 @@ A Gaussian processes framework in Python.
 
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
+### Moving to Python 3
+Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and abive.
+
+Work done so far:
+
+* Use 2to3 to fix relative imports
+* use 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
+
 ### Citation
 
     @Misc{gpy2014,

From 5601a580deed34877a988ba3adb850f21d944e3f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:48:48 +0000
Subject: [PATCH 020/166] Convert print to function for Python 3 compatibility.
 This breaks compatibility for versions of Python < 2.6

---
 .../expectation_propagation_dtc.py                |  2 +-
 .../latent_function_inference/var_dtc.py          |  2 +-
 GPy/inference/mcmc/samplers.py                    |  2 +-
 .../optimization/conjugate_gradient_descent.py    |  4 ++--
 GPy/inference/optimization/optimization.py        | 12 ++++++------
 GPy/inference/optimization/scg.py                 | 15 +++++++--------
 6 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index 35b1b7dc..0f972a84 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -179,7 +179,7 @@ class EPDTC(LatentFunctionInference):
         if VVT_factor.shape[1] == Y.shape[1]:
             woodbury_vector = Cpsi1Vf # == Cpsi1V
         else:
-            print 'foobar'
+            print('foobar')
             psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
             tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
             tmp, _ = dpotrs(LB, tmp, lower=1)
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index d61e7f0f..db59df14 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -170,7 +170,7 @@ class VarDTC(LatentFunctionInference):
         if VVT_factor.shape[1] == Y.shape[1]:
             woodbury_vector = Cpsi1Vf # == Cpsi1V
         else:
-            print 'foobar'
+            print('foobar')
             import ipdb; ipdb.set_trace()
             psi1V = np.dot(Y.T*beta, psi1).T
             tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
diff --git a/GPy/inference/mcmc/samplers.py b/GPy/inference/mcmc/samplers.py
index 444d99d7..ff396a96 100644
--- a/GPy/inference/mcmc/samplers.py
+++ b/GPy/inference/mcmc/samplers.py
@@ -40,7 +40,7 @@ class Metropolis_Hastings:
         fcurrent = self.model.log_likelihood() + self.model.log_prior()
         accepted = np.zeros(Ntotal,dtype=np.bool)
         for it in range(Ntotal):
-            print "sample %d of %d\r"%(it,Ntotal),
+            print("sample %d of %d\r"%(it,Ntotal), end=' ')
             sys.stdout.flush()
             prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
             self.model._set_params_transformed(prop)
diff --git a/GPy/inference/optimization/conjugate_gradient_descent.py b/GPy/inference/optimization/conjugate_gradient_descent.py
index dfc4a48d..274de784 100644
--- a/GPy/inference/optimization/conjugate_gradient_descent.py
+++ b/GPy/inference/optimization/conjugate_gradient_descent.py
@@ -74,7 +74,7 @@ class _Async_Optimization(Thread):
         if self.outq is not None:
             self.outq.put(self.SENTINEL)
         if self.messages:
-            print ""
+            print("")
         self.runsignal.clear()
 
     def run(self, *args, **kwargs):
@@ -213,7 +213,7 @@ class Async_Optimize(object):
 #                     # print "^C"
 #                     self.runsignal.clear()
 #                     c.join()
-            print "WARNING: callback still running, optimisation done!"
+            print("WARNING: callback still running, optimisation done!")
         return p.result
 
 class CGD(Async_Optimize):
diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py
index aa9be793..0d6887e5 100644
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@@ -125,9 +125,9 @@ class opt_lbfgsb(Optimizer):
 
         opt_dict = {}
         if self.xtol is not None:
-            print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it"
+            print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
         if self.ftol is not None:
-            print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
+            print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
         if self.gtol is not None:
             opt_dict['pgtol'] = self.gtol
         if self.bfgs_factor is not None:
@@ -158,7 +158,7 @@ class opt_simplex(Optimizer):
         if self.ftol is not None:
             opt_dict['ftol'] = self.ftol
         if self.gtol is not None:
-            print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
+            print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
 
         opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
                    maxfun=self.max_f_eval, full_output=True, **opt_dict)
@@ -186,11 +186,11 @@ class opt_rasm(Optimizer):
 
         opt_dict = {}
         if self.xtol is not None:
-            print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it"
+            print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
         if self.ftol is not None:
-            print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
+            print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
         if self.gtol is not None:
-            print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
+            print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
 
         opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
                                    maxnumfuneval=self.max_f_eval)
diff --git a/GPy/inference/optimization/scg.py b/GPy/inference/optimization/scg.py
index 34dd181f..8960de1d 100644
--- a/GPy/inference/optimization/scg.py
+++ b/GPy/inference/optimization/scg.py
@@ -21,14 +21,13 @@
 #      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 #      POSSIBILITY OF SUCH DAMAGE.
 
-
+from __future__ import print_function
 import numpy as np
 import sys
 
-
 def print_out(len_maxiters, fnow, current_grad, beta, iteration):
-    print '\r',
-    print '{0:>0{mi}g}  {1:> 12e}  {2:< 12.6e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+    print('\r', end=' ')
+    print('{0:>0{mi}g}  {1:> 12e}  {2:< 12.6e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
     sys.stdout.flush()
 
 def exponents(fnow, current_grad):
@@ -80,7 +79,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
 
     len_maxiters = len(str(maxiters))
     if display:
-        print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
+        print(' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
         exps = exponents(fnow, current_grad)
         p_iter = iteration
 
@@ -140,7 +139,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
                 b = np.any(n_exps < exps)
                 if a or b:
                     p_iter = iteration
-                    print ''
+                    print('')
                 if b:
                     exps = n_exps
 
@@ -189,6 +188,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
 
     if display:
         print_out(len_maxiters, fnow, current_grad, beta, iteration)
-        print ""
-        print status
+        print("")
+        print(status)
     return x, flog, function_eval, status

From 2a433244280a7e3f7636562103f475ad6320e55f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:52:24 +0000
Subject: [PATCH 021/166] Convert print to function for Python 3 compatibility.

---
 GPy/kern/_src/coregionalize.py | 4 ++--
 GPy/kern/_src/stationary.py    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index 291402ec..b6a3aecf 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -61,7 +61,7 @@ class Coregionalize(Kern):
             try:
                 return self._K_weave(X, X2)
             except:
-                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                 config.set('weave', 'working', 'False')
                 return self._K_numpy(X, X2)
         else:
@@ -123,7 +123,7 @@ class Coregionalize(Kern):
             try:
                 dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
             except:
-                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                 config.set('weave', 'working', 'False')
                 dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
         else:
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 06671b23..426296f7 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -165,7 +165,7 @@ class Stationary(Kern):
                 try:
                     self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
                 except:
-                    print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                    print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                     config.set('weave', 'working', 'False')
                     self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
             else:
@@ -214,7 +214,7 @@ class Stationary(Kern):
             try:
                 return self.gradients_X_weave(dL_dK, X, X2)
             except:
-                print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+                print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                 config.set('weave', 'working', 'False')
                 return self.gradients_X_(dL_dK, X, X2)
         else:

From 4512964f0933a1ee36ab38b4b4f3753235bd9bb7 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 08:58:05 +0000
Subject: [PATCH 022/166] Convert print to function for Python 3 compatibility.
 This breaks compatibility for versions of Python < 2.6

---
 .../one_vs_all_sparse_classification.py       |  2 +-
 GPy/models/sparse_gp_minibatch.py             | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/GPy/models/one_vs_all_sparse_classification.py b/GPy/models/one_vs_all_sparse_classification.py
index 3bdd2647..7528ffd2 100644
--- a/GPy/models/one_vs_all_sparse_classification.py
+++ b/GPy/models/one_vs_all_sparse_classification.py
@@ -30,7 +30,7 @@ class OneVsAllSparseClassification(object):
 
         self.results = {}
         for yj in labels:
-            print 'Class %s vs all' %yj
+            print('Class %s vs all' %yj)
             Ynew = Y.copy()
             Ynew[Y.flatten()!=yj] = 0
             Ynew[Y.flatten()==yj] = 1
diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index e827bb70..a6081e61 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+from __future__ import print_function
 import numpy as np
 from ..core.parameterization.param import Param
 from ..core.sparse_gp import SparseGP
@@ -50,7 +51,7 @@ class SparseGPMiniBatch(SparseGP):
             else:
                 #inference_method = ??
                 raise NotImplementedError, "what to do what to do?"
-            print "defaulting to ", inference_method, "for latent function inference"
+            print("defaulting to ", inference_method, "for latent function inference")
 
         self.kl_factr = 1.
         self.Z = Param('inducing inputs', Z)
@@ -80,13 +81,13 @@ class SparseGPMiniBatch(SparseGP):
             overall = self.Y_normalized.shape[1]
             m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
             message = m_f(-1)
-            print message,
+            print(message, end=' ')
             for d in xrange(overall):
                 self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
-                print ' '*(len(message)+1) + '\r',
+                print(' '*(len(message)+1) + '\r', end=' ')
                 message = m_f(d)
-                print message,
-            print ''
+                print(message, end=' ')
+            print('')
 
         self.posterior = None
 
@@ -241,15 +242,15 @@ class SparseGPMiniBatch(SparseGP):
         if not self.stochastics:
             m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
             message = m_f(-1)
-            print message,
+            print(message, end=' ')
 
         for d in self.stochastics.d:
             ninan = self.ninan[:, d]
 
             if not self.stochastics:
-                print ' '*(len(message)) + '\r',
+                print(' '*(len(message)) + '\r', end=' ')
                 message = m_f(d)
-                print message,
+                print(message, end=' ')
 
             posterior, log_marginal_likelihood, \
                 grad_dict, current_values, value_indices = self._inner_parameters_changed(
@@ -268,7 +269,7 @@ class SparseGPMiniBatch(SparseGP):
             woodbury_vector[:, d:d+1] = posterior.woodbury_vector
             self._log_marginal_likelihood += log_marginal_likelihood
         if not self.stochastics:
-            print ''
+            print('')
 
         if self.posterior is None:
             self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,

From 4b4e5d490191228ec3270e95abb5afd6e03a0fd4 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 09:01:27 +0000
Subject: [PATCH 023/166] Convert print to function for Python 3 compatibility

---
 GPy/plotting/matplot_dep/dim_reduction_plots.py | 6 +++---
 GPy/plotting/matplot_dep/inference_plots.py     | 2 +-
 GPy/plotting/matplot_dep/kernel_plots.py        | 2 +-
 GPy/plotting/matplot_dep/maps.py                | 4 ++--
 GPy/plotting/matplot_dep/visualize.py           | 6 +++---
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 1398b40c..982f8fa9 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -62,7 +62,7 @@ def plot_latent(model, labels=None, which_indices=None,
 
 
     if X.shape[0] > 1000:
-        print "Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape)
+        print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
         subsample = np.random.choice(X.shape[0], size=1000, replace=False)
         X = X[subsample]
         labels = labels[subsample]
@@ -187,14 +187,14 @@ def plot_latent(model, labels=None, which_indices=None,
         fig.tight_layout()
         fig.canvas.draw()
     except Exception as e:
-        print "Could not invoke tight layout: {}".format(e)
+        print("Could not invoke tight layout: {}".format(e))
         pass
 
     if updates:
         try:
             ax.figure.canvas.show()
         except Exception as e:
-            print "Could not invoke show: {}".format(e)
+            print("Could not invoke show: {}".format(e))
         raw_input('Enter to continue')
         view.deactivate()
     return ax
diff --git a/GPy/plotting/matplot_dep/inference_plots.py b/GPy/plotting/matplot_dep/inference_plots.py
index c802932c..02007390 100644
--- a/GPy/plotting/matplot_dep/inference_plots.py
+++ b/GPy/plotting/matplot_dep/inference_plots.py
@@ -12,7 +12,7 @@ except:
 
 def plot_optimizer(optimizer):
     if optimizer.trace == None:
-        print "No trace present so I can't plot it. Please check that the optimizer actually supplies a trace."
+        print("No trace present so I can't plot it. Please check that the optimizer actually supplies a trace.")
     else:
         pb.figure()
         pb.plot(optimizer.trace)
diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index 347e3d08..fc061ca7 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -81,7 +81,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=Non
             last_bottom = ard_params[i,:]
             bottom += last_bottom
         else:
-            print "filtering out {}".format(kernel.parameters[i].name)
+            print("filtering out {}".format(kernel.parameters[i].name))
 
     ax.set_xlim(-.5, kernel.input_dim - .5)
     add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-last_bottom)
diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index fcb03b38..65cecd30 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -159,10 +159,10 @@ def new_shape_string(sf,name,regex,field=2,type=None):
 
         newshp.line(parts=_parts)
         newshp.records.append(sr.record)
-        print len(sr.record)
+        print(len(sr.record))
 
     newshp.save(name)
-    print index
+    print(index)
 
 def apply_bbox(sf,ax):
     """
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index 9ff41730..50eb4b82 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -225,8 +225,8 @@ class lvm_dimselect(lvm):
         self.labels = labels
         lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
         self.show_sensitivities()
-        print self.latent_values
-        print "use left and right mouse buttons to select dimensions"
+        print(self.latent_values)
+        print("use left and right mouse buttons to select dimensions")
 
 
     def on_click(self, event):
@@ -255,7 +255,7 @@ class lvm_dimselect(lvm):
 
 
     def on_leave(self,event):
-        print type(self.latent_values)
+        print(type(self.latent_values))
         latent_values = self.latent_values.copy()
         y = self.model.predict(latent_values[None,:])[0]
         self.data_visualize.modify(y)

From c5b91e543ab19c46691a426ebef34d6f4441e395 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 09:03:04 +0000
Subject: [PATCH 024/166] Convert print to function for Python 3 compatibility

---
 GPy/util/block_matrices.py    |  2 +-
 GPy/util/classification.py    | 10 +++---
 GPy/util/datasets.py          | 64 +++++++++++++++++------------------
 GPy/util/debug.py             |  8 ++---
 GPy/util/gpu_init.py          |  2 +-
 GPy/util/linalg.py            |  8 ++---
 GPy/util/warping_functions.py |  2 +-
 7 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py
index 95920868..2d68fd47 100644
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@@ -23,4 +23,4 @@ if __name__=='__main__':
     A = np.zeros((5,5))
     B = get_blocks(A,[2,3])
     B[0,0] += 7
-    print B
+    print(B)
diff --git a/GPy/util/classification.py b/GPy/util/classification.py
index c0859793..69609091 100644
--- a/GPy/util/classification.py
+++ b/GPy/util/classification.py
@@ -25,9 +25,9 @@ def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
     true_0 = labels.size - true_1 - false_0 - false_1
     error = (false_1 + false_0)/np.float(labels.size)
     if show:
-        print 100. - error * 100,'% instances correctly classified'
-        print '%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1])
-        print '----------|------------|------------|'
-        print '%-10s|  %-10s|  %-10s| ' % (names[0],true_1,false_0)
-        print '%-10s|  %-10s|  %-10s| ' % (names[1],false_1,true_0)
+        print(100. - error * 100,'% instances correctly classified')
+        print('%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1]))
+        print('----------|------------|------------|')
+        print('%-10s|  %-10s|  %-10s| ' % (names[0],true_1,false_0))
+        print('%-10s|  %-10s|  %-10s| ' % (names[1],false_1,true_0))
     return error,true_1, false_1, true_0, false_0
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 10835463..346a9c45 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -75,7 +75,7 @@ def prompt_user(prompt):
     elif choice in no:
         return False
     else:
-        print("Your response was a " + choice)
+        print(("Your response was a " + choice))
         print("Please respond with 'yes', 'y' or 'no', 'n'")
         #return prompt_user()
 
@@ -99,7 +99,7 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
     """Download a file from a url and save it to disk."""
     i = url.rfind('/')
     file = url[i+1:]
-    print file
+    print(file)
     dir_name = os.path.join(data_path, store_directory)
 
     if save_name is None: save_name = os.path.join(dir_name, file)
@@ -107,7 +107,7 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
 
     if suffix is None: suffix=''
 
-    print "Downloading ", url, "->", save_name
+    print("Downloading ", url, "->", save_name)
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
     try:
@@ -150,7 +150,7 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
             sys.stdout.write(status)
             sys.stdout.flush()
         sys.stdout.write(" "*(len(status)) + "\r")
-        print status
+        print(status)
     # if we wanted to get more sophisticated maybe we should check the response code here again even for successes.
     #with open(save_name, 'wb') as f:
     #    f.write(response.read())
@@ -159,32 +159,32 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
 
 def authorize_download(dataset_name=None):
     """Check with the user that the are happy with terms and conditions for the data set."""
-    print('Acquiring resource: ' + dataset_name)
+    print(('Acquiring resource: ' + dataset_name))
     # TODO, check resource is in dictionary!
     print('')
     dr = data_resources[dataset_name]
     print('Details of data: ')
-    print(dr['details'])
+    print((dr['details']))
     print('')
     if dr['citation']:
         print('Please cite:')
-        print(dr['citation'])
+        print((dr['citation']))
         print('')
     if dr['size']:
-        print('After downloading the data will take up ' + str(dr['size']) + ' bytes of space.')
+        print(('After downloading the data will take up ' + str(dr['size']) + ' bytes of space.'))
         print('')
-    print('Data will be stored in ' + os.path.join(data_path, dataset_name) + '.')
+    print(('Data will be stored in ' + os.path.join(data_path, dataset_name) + '.'))
     print('')
     if overide_manual_authorize:
         if dr['license']:
             print('You have agreed to the following license:')
-            print(dr['license'])
+            print((dr['license']))
             print('')
         return True
     else:
         if dr['license']:
             print('You must also agree to the following license:')
-            print(dr['license'])
+            print((dr['license']))
             print('')
         return prompt_user('Do you wish to proceed with the download? [yes/no]')
 
@@ -495,18 +495,18 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
     file = 'data.csv'
     file_name = os.path.join(dir_path,file)
     if not os.path.exists(file_name) or refresh_data:
-        print "Accessing Google trends to acquire the data. Note that repeated accesses will result in a block due to a google terms of service violation. Failure at this point may be due to such blocks."
+        print("Accessing Google trends to acquire the data. Note that repeated accesses will result in a block due to a google terms of service violation. Failure at this point may be due to such blocks.")
         # quote the query terms.
         quoted_terms = []
         for term in query_terms:
             quoted_terms.append(urllib2.quote(term))
-        print "Query terms: ", ', '.join(query_terms)
+        print("Query terms: ", ', '.join(query_terms))
 
-        print "Fetching query:"
+        print("Fetching query:")
         query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(quoted_terms)
 
         data = urllib2.urlopen(query).read()
-        print "Done."
+        print("Done.")
         # In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
         header = """// Data table response\ngoogle.visualization.Query.setResponse("""
         data = data[len(header):-2]
@@ -520,8 +520,8 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
 
         df.to_csv(file_name)
     else:
-        print "Reading cached data for google trends. To refresh the cache set 'refresh_data=True' when calling this function."
-        print "Query terms: ", ', '.join(query_terms)
+        print("Reading cached data for google trends. To refresh the cache set 'refresh_data=True' when calling this function.")
+        print("Query terms: ", ', '.join(query_terms))
 
         df = pandas.read_csv(file_name, parse_dates=[0])
 
@@ -679,11 +679,11 @@ def ripley_synth(data_set='ripley_prnn_data'):
 def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False):
     path = os.path.join(data_path, data_set)
     if data_available(data_set) and not refresh_data:
-        print 'Using cached version of the data set, to use latest version set refresh_data to True'
+        print('Using cached version of the data set, to use latest version set refresh_data to True')
     else:
         download_data(data_set)
     data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data'))
-    print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0]
+    print('Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0])
     allX = data[data[:, 3]!=-99.99, 2:3]
     allY = data[data[:, 3]!=-99.99, 3:4]
     X = allX[:num_train, 0:1]
@@ -695,11 +695,11 @@ def global_average_temperature(data_set='global_temperature', num_train=1000, re
 def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
     path = os.path.join(data_path, data_set)
     if data_available(data_set) and not refresh_data:
-        print 'Using cached version of the data set, to use latest version set refresh_data to True'
+        print('Using cached version of the data set, to use latest version set refresh_data to True')
     else:
         download_data(data_set)
     data = np.loadtxt(os.path.join(data_path, data_set, 'co2_mm_mlo.txt'))
-    print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0]
+    print('Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0])
     allX = data[data[:, 3]!=-99.99, 2:3]
     allY = data[data[:, 3]!=-99.99, 3:4]
     X = allX[:num_train, 0:1]
@@ -802,10 +802,10 @@ def hapmap3(data_set='hapmap3'):
     if not reduce(lambda a,b: a and b, map(os.path.exists, preprocessed_data_paths)):
         if not overide_manual_authorize and not prompt_user("Preprocessing requires ~25GB "
                             "of memory and can take a (very) long time, continue? [Y/n]"):
-            print "Preprocessing required for further usage."
+            print("Preprocessing required for further usage.")
             return
         status = "Preprocessing data, please be patient..."
-        print status
+        print(status)
         def write_status(message, progress, status):
             stdout.write(" "*len(status)); stdout.write("\r"); stdout.flush()
             status = r"[{perc: <{ll}}] {message: <13s}".format(message=message, ll=20,
@@ -873,13 +873,13 @@ def hapmap3(data_set='hapmap3'):
         inandf = DataFrame(index=metadf.index, data=inan, columns=mapnp[:,1])
         inandf.to_pickle(preprocessed_data_paths[2])
         status=write_status('done :)', 100, status)
-        print ''
+        print('')
     else:
-        print "loading snps..."
+        print("loading snps...")
         snpsdf = read_pickle(preprocessed_data_paths[0])
-        print "loading metainfo..."
+        print("loading metainfo...")
         metadf = read_pickle(preprocessed_data_paths[1])
-        print "loading nan entries..."
+        print("loading nan entries...")
         inandf = read_pickle(preprocessed_data_paths[2])
     snps = snpsdf.values
     populations = metadf.population.values.astype('S3')
@@ -1001,7 +1001,7 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
     # Extract the tar file
     filename = os.path.join(dir_path, 'GSE45719_Raw.tar')
     with tarfile.open(filename, 'r') as files:
-        print "Extracting Archive {}...".format(files.name)
+        print("Extracting Archive {}...".format(files.name))
         data = None
         gene_info = None
         message = ''
@@ -1010,9 +1010,9 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
         for i, file_info in enumerate(members):
             f = files.extractfile(file_info)
             inner = read_csv(f, sep='\t', header=0, compression='gzip', index_col=0)
-            print ' '*(len(message)+1) + '\r',
+            print(' '*(len(message)+1) + '\r', end=' ')
             message = "{: >7.2%}: Extracting: {}".format(float(i+1)/overall, file_info.name[:20]+"...txt.gz")
-            print message,
+            print(message, end=' ')
             if data is None:
                 data = inner.RPKM.to_frame()
                 data.columns = [file_info.name[:-18]]
@@ -1035,8 +1035,8 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
 
     sys.stdout.write(' '*len(message) + '\r')
     sys.stdout.flush()
-    print
-    print "Read Archive {}".format(files.name)
+    print()
+    print("Read Archive {}".format(files.name))
 
     return data_details_return({'Y': data,
                                 'series_info': info,
diff --git a/GPy/util/debug.py b/GPy/util/debug.py
index 00107f5e..d691ad82 100644
--- a/GPy/util/debug.py
+++ b/GPy/util/debug.py
@@ -13,7 +13,7 @@ def checkFinite(arr, name=None):
 
     if np.any(np.logical_not(np.isfinite(arr))):
         idx = np.where(np.logical_not(np.isfinite(arr)))[0]
-        print name+' at indices '+str(idx)+' have not finite values: '+str(arr[idx])+'!'
+        print(name+' at indices '+str(idx)+' have not finite values: '+str(arr[idx])+'!')
         return False
     return True
 
@@ -23,13 +23,13 @@ def checkFullRank(m, tol=1e-10, name=None, force_check=False):
     assert len(m.shape)==2 and m.shape[0]==m.shape[1], 'The input of checkFullRank has to be a square matrix!'
 
     if not force_check and m.shape[0]>=10000:
-        print 'The size of '+name+'is too big to check (>=10000)!'
+        print('The size of '+name+'is too big to check (>=10000)!')
         return True
 
     s = np.real(np.linalg.eigvals(m))
 
     if s.min()/s.max()<tol:
-        print name+' is close to singlar!'
-        print 'The eigen values of '+name+' is '+str(s)
+        print(name+' is close to singlar!')
+        print('The eigen values of '+name+' is '+str(s))
         return False
     return True
diff --git a/GPy/util/gpu_init.py b/GPy/util/gpu_init.py
index b6a4a164..26dff0b3 100644
--- a/GPy/util/gpu_init.py
+++ b/GPy/util/gpu_init.py
@@ -23,7 +23,7 @@ try:
         import pycuda.driver
         pycuda.driver.init()
         if gpuid>=pycuda.driver.Device.count():
-            print '['+MPI.Get_processor_name()+'] more processes than the GPU numbers!'
+            print('['+MPI.Get_processor_name()+'] more processes than the GPU numbers!')
             #MPI.COMM_WORLD.Abort()
             raise
         gpu_device = pycuda.driver.Device(gpuid)
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 216a1050..d7ad5d61 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -34,7 +34,7 @@ if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda',
         dsyrk = mkl_rt.dsyrk
         dsyr = mkl_rt.dsyr
         _blas_available = True
-        print 'anaconda installed and mkl is loaded'
+        print('anaconda installed and mkl is loaded')
     except:
         _blas_available = False
 else:
@@ -64,7 +64,7 @@ def force_F_ordered(A):
     """
     if A.flags['F_CONTIGUOUS']:
         return A
-    print "why are your arrays not F order?"
+    print("why are your arrays not F order?")
     return np.asfortranarray(A)
 
 # def jitchol(A, maxtries=5):
@@ -288,7 +288,7 @@ def pca(Y, input_dim):
 
     """
     if not np.allclose(Y.mean(axis=0), 0.0):
-        print "Y is not zero mean, centering it locally (GPy.util.linalg.pca)"
+        print("Y is not zero mean, centering it locally (GPy.util.linalg.pca)")
 
         # Y -= Y.mean(axis=0)
 
@@ -423,7 +423,7 @@ def symmetrify(A, upper=False):
         try:
             symmetrify_weave(A, upper)
         except:
-            print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
+            print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
             config.set('weave', 'working', 'False')
             symmetrify_numpy(A, upper)
     else:
diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py
index a0a385e0..70804c67 100644
--- a/GPy/util/warping_functions.py
+++ b/GPy/util/warping_functions.py
@@ -207,7 +207,7 @@ class TanhWarpingFunction_d(WarpingFunction):
             y -= update
             it += 1
         if it == max_iterations:
-            print "WARNING!!! Maximum number of iterations reached in f_inv "
+            print("WARNING!!! Maximum number of iterations reached in f_inv ")
 
         return y
 

From 8a7123f55a75092927039e17cf35acc84e37d652 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 09:10:27 +0000
Subject: [PATCH 025/166] Fixed Python 2 compatibility

---
 GPy/util/datasets.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 346a9c45..3ba46f52 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import csv
 import os
 import copy

From 70c8f4a410a300546fbc70ac1bebf592dc248624 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 09:16:21 +0000
Subject: [PATCH 026/166] Typo

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2e9dc58a..9111a48c 100644
--- a/README.md
+++ b/README.md
@@ -11,12 +11,12 @@ A Gaussian processes framework in Python.
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
 ### Moving to Python 3
-Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and abive.
+Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and above.
 
 Work done so far:
 
-* Use 2to3 to fix relative imports
-* use 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
+* Used 2to3 to fix relative imports
+* Used 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
 
 ### Citation
 

From c4fb58176dbaaec2e5b7fce216aac5656d59225d Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 13:33:39 +0000
Subject: [PATCH 027/166] Exception fixes for Python 3 compat

---
 GPy/core/mapping.py                          |  4 ++--
 GPy/core/model.py                            |  4 ++--
 GPy/core/parameterization/lists_and_dicts.py |  2 +-
 GPy/core/parameterization/parameter_core.py  | 14 +++++++-------
 GPy/core/parameterization/parameterized.py   | 18 +++++++++---------
 GPy/core/parameterization/variational.py     |  4 ++--
 GPy/core/sparse_gp.py                        |  2 +-
 GPy/core/symbolic.py                         |  2 +-
 8 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 163db0c9..c84087cc 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -60,7 +60,7 @@ class Mapping(Parameterized):
             from ..plotting.matplot_dep import models_plots
             mapping_plots.plot_mapping(self,*args)
         else:
-            raise NameError, "matplotlib package has not been imported."
+            raise NameError("matplotlib package has not been imported.")
 
 class Bijective_mapping(Mapping):
     """
@@ -111,7 +111,7 @@ class Mapping_check_model(Model):
         return (self.dL_df*self.mapping.f(self.X)).sum()
 
     def _log_likelihood_gradients(self):
-        raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
+        raise NotImplementedError("This needs to be implemented to use the Mapping_check_model class.")
 
 class Mapping_check_df_dtheta(Mapping_check_model):
     """This class allows gradient checks for the gradient of a mapping with respect to parameters. """
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 348cebf1..65a85589 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -30,7 +30,7 @@ class Model(Parameterized):
         self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
 
     def log_likelihood(self):
-        raise NotImplementedError, "this needs to be implemented to use the model class"
+        raise NotImplementedError("this needs to be implemented to use the model class")
     def _log_likelihood_gradients(self):
         return self.gradient.copy()
 
@@ -119,7 +119,7 @@ class Model(Parameterized):
 
         DEPRECATED.
         """
-        raise DeprecationWarning, 'parameters now have default constraints'
+        raise DeprecationWarning('parameters now have default constraints')
 
     def objective_function(self):
         """
diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py
index 626603ec..2d774a76 100644
--- a/GPy/core/parameterization/lists_and_dicts.py
+++ b/GPy/core/parameterization/lists_and_dicts.py
@@ -32,7 +32,7 @@ class ArrayList(list):
             if el is item:
                 return index
             index += 1
-        raise ValueError, "{} is not in list".format(item)
+        raise ValueError("{} is not in list".format(item))
     pass
 
 class ObserverList(object):
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 06991ab0..02cb0a12 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -36,7 +36,7 @@ def adjust_name_for_printing(name):
         name = name.replace("/", "_l_").replace("@", '_at_')
         name = name.replace("(", "_of_").replace(")", "")
         if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
-            raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)
+            raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name))
         return name
     return ''
 
@@ -65,13 +65,13 @@ class Parentable(object):
         Gets called, when the parent changed, so we can adjust our
         inner attributes according to the new parent.
         """
-        raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
+        raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent")
 
     def _disconnect_parent(self, *args, **kw):
         """
         Disconnect this object from its parent
         """
-        raise NotImplementedError, "Abstract superclass"
+        raise NotImplementedError("Abstract superclass")
 
     @property
     def _highest_parent_(self):
@@ -214,7 +214,7 @@ class Gradcheckable(Pickleable, Parentable):
         Perform the checkgrad on the model.
         TODO: this can be done more efficiently, when doing it inside here
         """
-        raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
+        raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!")
 
 class Nameable(Gradcheckable):
     """
@@ -652,10 +652,10 @@ class OptimizationHandlable(Indexable):
         self.trigger_update()
 
     def _get_params_transformed(self):
-        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
+        raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
 #
     def _set_params_transformed(self, p):
-        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
+        raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
 
     def _trigger_params_changed(self, trigger_parent=True):
         """
@@ -701,7 +701,7 @@ class OptimizationHandlable(Indexable):
         Return the number of parameters of this parameter_handle.
         Param objects will always return 0.
         """
-        raise NotImplemented, "Abstract, please implement in respective classes"
+        raise NotImplemented("Abstract, please implement in respective classes")
 
     def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
         """
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 1a5ff123..62914636 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -131,7 +131,7 @@ class Parameterized(Parameterizable):
             if param.has_parent():
                 def visit(parent, self):
                     if parent is self:
-                        raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
+                        raise HierarchyError("You cannot add a parameter twice into the hierarchy")
                 param.traverse_parents(visit, self)
                 param._parent_.unlink_parameter(param)
             # make sure the size is set
@@ -173,7 +173,7 @@ class Parameterized(Parameterizable):
                 self._highest_parent_._connect_fixes()
 
         else:
-            raise HierarchyError, """Parameter exists already, try making a copy"""
+            raise HierarchyError("""Parameter exists already, try making a copy""")
 
 
     def link_parameters(self, *parameters):
@@ -189,9 +189,9 @@ class Parameterized(Parameterizable):
         """
         if not param in self.parameters:
             try:
-                raise RuntimeError, "{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
+                raise RuntimeError("{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name))
             except AttributeError:
-                raise RuntimeError, "{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param))
+                raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
 
         start = sum([p.size for p in self.parameters[:param._parent_index_]])
         self._remove_parameter_name(param)
@@ -215,9 +215,9 @@ class Parameterized(Parameterizable):
         self._highest_parent_._notify_parent_change()
 
     def add_parameter(self, *args, **kwargs):
-        raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead"
+        raise DeprecationWarning("add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead")
     def remove_parameter(self, *args, **kwargs):
-        raise DeprecationWarning, "remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead"
+        raise DeprecationWarning("remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead")
 
     def _connect_parameters(self, ignore_added_names=False):
         # connect parameterlist to this parameterized object
@@ -237,7 +237,7 @@ class Parameterized(Parameterizable):
         self._param_slices_ = []
         for i, p in enumerate(self.parameters):
             if not p.param_array.flags['C_CONTIGUOUS']:
-                raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
+                raise ValueError("This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS")
 
             p._parent_ = self
             p._parent_index_ = i
@@ -279,7 +279,7 @@ class Parameterized(Parameterizable):
         else:
             if paramlist is None:
                 paramlist = self.grep_param_names(name)
-            if len(paramlist) < 1: raise AttributeError, name
+            if len(paramlist) < 1: raise AttributeError(name)
             if len(paramlist) == 1:
                 if isinstance(paramlist[-1], Parameterized):
                     paramlist = paramlist[-1].flattened_parameters
@@ -295,7 +295,7 @@ class Parameterized(Parameterizable):
             try:
                 self.param_array[name] = value
             except:
-                raise ValueError, "Setting by slice or index only allowed with array-like"
+                raise ValueError("Setting by slice or index only allowed with array-like")
             self.trigger_update()
         else:
             try: param = self.__getitem__(name, paramlist)
diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 25efdc92..842183fb 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -16,13 +16,13 @@ class VariationalPrior(Parameterized):
         super(VariationalPrior, self).__init__(name=name, **kw)
 
     def KL_divergence(self, variational_posterior):
-        raise NotImplementedError, "override this for variational inference of latent space"
+        raise NotImplementedError("override this for variational inference of latent space")
 
     def update_gradients_KL(self, variational_posterior):
         """
         updates the gradients for mean and variance **in place**
         """
-        raise NotImplementedError, "override this for variational inference of latent space"
+        raise NotImplementedError("override this for variational inference of latent space")
 
 class NormalPrior(VariationalPrior):
     def KL_divergence(self, variational_posterior):
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 96e3dbe7..ebaf721f 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -48,7 +48,7 @@ class SparseGP(GP):
                 inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
             else:
                 #inference_method = ??
-                raise NotImplementedError, "what to do what to do?"
+                raise NotImplementedError("what to do what to do?")
             print("defaulting to ", inference_method, "for latent function inference")
 
         self.Z = Param('inducing inputs', Z)
diff --git a/GPy/core/symbolic.py b/GPy/core/symbolic.py
index ed3a9d59..4a9fcb76 100644
--- a/GPy/core/symbolic.py
+++ b/GPy/core/symbolic.py
@@ -223,7 +223,7 @@ class Symbolic_core():
 
     def code_gradients_cacheable(self, function, variable):
         if variable not in self.cacheable:
-            raise RuntimeError, variable + ' must be a cacheable.'
+            raise RuntimeError(variable + ' must be a cacheable.')
         lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n'
         lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
         for i, theta in enumerate(self.variables[variable]):

From 7c6ff2982fce37988b53b8aa81ebc0958bfffca7 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 13:36:45 +0000
Subject: [PATCH 028/166] Exception fixes for Python 3 compat

---
 GPy/inference/latent_function_inference/dtc.py              | 4 ++--
 .../expectation_propagation_dtc.py                          | 2 +-
 GPy/inference/latent_function_inference/fitc.py             | 2 +-
 GPy/inference/latent_function_inference/posterior.py        | 6 +++---
 GPy/inference/latent_function_inference/var_dtc.py          | 2 +-
 GPy/inference/optimization/optimization.py                  | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py
index 5590a079..57a451b2 100644
--- a/GPy/inference/latent_function_inference/dtc.py
+++ b/GPy/inference/latent_function_inference/dtc.py
@@ -29,7 +29,7 @@ class DTC(LatentFunctionInference):
         #make sure the noise is not hetero
         beta = 1./likelihood.gaussian_variance(Y_metadata)
         if beta.size > 1:
-            raise NotImplementedError, "no hetero noise with this implementation of DTC"
+            raise NotImplementedError("no hetero noise with this implementation of DTC")
 
         Kmm = kern.K(Z)
         Knn = kern.Kdiag(X)
@@ -97,7 +97,7 @@ class vDTC(object):
         #make sure the noise is not hetero
         beta = 1./likelihood.gaussian_variance(Y_metadata)
         if beta.size > 1:
-            raise NotImplementedError, "no hetero noise with this implementation of DTC"
+            raise NotImplementedError("no hetero noise with this implementation of DTC")
 
         Kmm = kern.K(Z)
         Knn = kern.Kdiag(X)
diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index 0f972a84..e25df388 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -314,7 +314,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
         dL_dR = None
     elif het_noise:
         if uncertain_inputs:
-            raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
+            raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
         else:
             #from ...util.linalg import chol_inv
             #LBi = chol_inv(LB)
diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py
index a184c6c4..abe53f3d 100644
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@@ -26,7 +26,7 @@ class FITC(LatentFunctionInference):
         #make sure the noise is not hetero
         sigma_n = likelihood.gaussian_variance(Y_metadata)
         if sigma_n.size >1:
-            raise NotImplementedError, "no hetero noise with this implementation of FITC"
+            raise NotImplementedError("no hetero noise with this implementation of FITC")
 
         Kmm = kern.K(Z)
         Knn = kern.Kdiag(X)
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index 34f0b3bb..73d65df6 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -52,7 +52,7 @@ class Posterior(object):
                 or ((mean is not None) and (cov is not None)):
             pass # we have sufficient to compute the posterior
         else:
-            raise ValueError, "insufficient information to compute the posterior"
+            raise ValueError("insufficient information to compute the posterior")
 
         self._K_chol = K_chol
         self._K = K
@@ -134,13 +134,13 @@ class Posterior(object):
                 #self._woodbury_chol = jitchol(W)
             #try computing woodbury chol from cov
             elif self._covariance is not None:
-                raise NotImplementedError, "TODO: check code here"
+                raise NotImplementedError("TODO: check code here")
                 B = self._K - self._covariance
                 tmp, _ = dpotrs(self.K_chol, B)
                 self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
                 _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
             else:
-                raise ValueError, "insufficient information to compute posterior"
+                raise ValueError("insufficient information to compute posterior")
         return self._woodbury_chol
 
     @property
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index db59df14..1be2557b 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -213,7 +213,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
         dL_dR = None
     elif het_noise:
         if uncertain_inputs:
-            raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
+            raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
         else:
             #from ...util.linalg import chol_inv
             #LBi = chol_inv(LB)
diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py
index 0d6887e5..600de35d 100644
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@@ -54,7 +54,7 @@ class Optimizer():
         self.time = str(end - start)
 
     def opt(self, f_fp=None, f=None, fp=None):
-        raise NotImplementedError, "this needs to be implemented to use the optimizer class"
+        raise NotImplementedError("this needs to be implemented to use the optimizer class")
 
     def plot(self):
         """

From f961520c4220fc803e5f2416f6f96b92ba5e57cc Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 13:38:13 +0000
Subject: [PATCH 029/166] Exception fixes for Python 3 compat

---
 GPy/kern/_src/ODE_UY.py            | 2 +-
 GPy/kern/_src/ODE_UYC.py           | 2 +-
 GPy/kern/_src/ODE_st.py            | 2 +-
 GPy/kern/_src/ODE_t.py             | 2 +-
 GPy/kern/_src/add.py               | 2 +-
 GPy/kern/_src/psi_comp/__init__.py | 8 ++++----
 GPy/kern/_src/stationary.py        | 4 ++--
 GPy/kern/_src/symbolic.py          | 2 +-
 8 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/GPy/kern/_src/ODE_UY.py b/GPy/kern/_src/ODE_UY.py
index b4a2b42d..eef8609b 100644
--- a/GPy/kern/_src/ODE_UY.py
+++ b/GPy/kern/_src/ODE_UY.py
@@ -114,7 +114,7 @@ class ODE_UY(Kern):
                 elif i==1:
                     Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
                 else:
-                    raise ValueError, "invalid input/output index"
+                    raise ValueError("invalid input/output index")
         #Kdiag[slices[0][0]]+= self.variance_U   #matern32 diag
         #Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3)  #  diag
         return Kdiag
diff --git a/GPy/kern/_src/ODE_UYC.py b/GPy/kern/_src/ODE_UYC.py
index 1722d2e1..4c39a9c9 100644
--- a/GPy/kern/_src/ODE_UYC.py
+++ b/GPy/kern/_src/ODE_UYC.py
@@ -115,7 +115,7 @@ class ODE_UYC(Kern):
                 elif i==1:
                     Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
                 else:
-                    raise ValueError, "invalid input/output index"
+                    raise ValueError("invalid input/output index")
         #Kdiag[slices[0][0]]+= self.variance_U   #matern32 diag
         #Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3)  #  diag
         return Kdiag
diff --git a/GPy/kern/_src/ODE_st.py b/GPy/kern/_src/ODE_st.py
index 665be230..1c3b661b 100644
--- a/GPy/kern/_src/ODE_st.py
+++ b/GPy/kern/_src/ODE_st.py
@@ -135,7 +135,7 @@ class ODE_st(Kern):
                     Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx
                     #Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
                 else:
-                    raise ValueError, "invalid input/output index"
+                    raise ValueError("invalid input/output index")
 
         return Kdiag
         
diff --git a/GPy/kern/_src/ODE_t.py b/GPy/kern/_src/ODE_t.py
index a470cbec..268917ae 100644
--- a/GPy/kern/_src/ODE_t.py
+++ b/GPy/kern/_src/ODE_t.py
@@ -85,7 +85,7 @@ class ODE_t(Kern):
                             Kdiag[s1]+= k1 + vyt+self.ubias
                             #Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
                         else:
-                            raise ValueError, "invalid input/output index"
+                            raise ValueError("invalid input/output index")
 
                 return Kdiag
 
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 4c72a254..0f612f5b 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -111,7 +111,7 @@ class Add(CombinationKernel):
                 psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
                 #(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
             else:
-                raise NotImplementedError, "psi2 cannot be computed for this kernel"
+                raise NotImplementedError("psi2 cannot be computed for this kernel")
         return psi2
 
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py
index a277ff02..74aacd75 100644
--- a/GPy/kern/_src/psi_comp/__init__.py
+++ b/GPy/kern/_src/psi_comp/__init__.py
@@ -17,7 +17,7 @@ class PSICOMP_RBF(Pickleable):
         elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
             return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
         else:
-            raise ValueError, "unknown distriubtion received for psi-statistics"
+            raise ValueError("unknown distriubtion received for psi-statistics")
 
     @Cache_this(limit=2, ignore_args=(0,1,2,3))
     def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@@ -26,7 +26,7 @@ class PSICOMP_RBF(Pickleable):
         elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
             return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
         else:
-            raise ValueError, "unknown distriubtion received for psi-statistics"
+            raise ValueError("unknown distriubtion received for psi-statistics")
 
     def _setup_observers(self):
         pass
@@ -40,7 +40,7 @@ class PSICOMP_Linear(Pickleable):
         elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
             return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
         else:
-            raise ValueError, "unknown distriubtion received for psi-statistics"
+            raise ValueError("unknown distriubtion received for psi-statistics")
 
     @Cache_this(limit=2, ignore_args=(0,1,2,3))
     def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
@@ -49,7 +49,7 @@ class PSICOMP_Linear(Pickleable):
         elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
             return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
         else:
-            raise ValueError, "unknown distriubtion received for psi-statistics"
+            raise ValueError("unknown distriubtion received for psi-statistics")
 
     def _setup_observers(self):
         pass
\ No newline at end of file
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 426296f7..0cd85b38 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -65,10 +65,10 @@ class Stationary(Kern):
         self.link_parameters(self.variance, self.lengthscale)
 
     def K_of_r(self, r):
-        raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
+        raise NotImplementedError("implement the covariance function as a fn of r to use this class")
 
     def dK_dr(self, r):
-        raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
+        raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
 
     @Cache_this(limit=5, ignore_args=())
     def K(self, X, X2=None):
diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py
index 006af9dc..9ca20ea5 100644
--- a/GPy/kern/_src/symbolic.py
+++ b/GPy/kern/_src/symbolic.py
@@ -11,7 +11,7 @@ class Symbolic(Kern, Symbolic_core):
     def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
 
         if k is None:
-            raise ValueError, "You must provide an argument for the covariance function."
+            raise ValueError("You must provide an argument for the covariance function.")
 
         Kern.__init__(self, input_dim, active_dims, name=name)
         kdiag = k

From 2b8ef1041bf95d7f67ca4fc4d4a3d73cb7e909ff Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 13:43:55 +0000
Subject: [PATCH 030/166] Exception fixes for Python 3 compat

---
 GPy/likelihoods/likelihood.py     | 2 +-
 GPy/likelihoods/link_functions.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 33698eb2..813f912f 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -203,7 +203,7 @@ class Likelihood(Parameterized):
 
     def _conditional_mean(self, f):
         """Quadrature calculation of the conditional mean: E(Y_star|f)"""
-        raise NotImplementedError, "implement this function to make predictions"
+        raise NotImplementedError("implement this function to make predictions")
 
     def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
         """
diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index a4ddc760..60e260e7 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -182,7 +182,7 @@ class Heaviside(GPTransformation):
         return np.where(f>0, 1, 0)
 
     def dtransf_df(self,f):
-        raise NotImplementedError, "This function is not differentiable!"
+        raise NotImplementedError("This function is not differentiable!")
 
     def d2transf_df2(self,f):
-        raise NotImplementedError, "This function is not differentiable!"
+        raise NotImplementedError("This function is not differentiable!")

From 74f8caba2bbca9bdc4fa5f63d7ddb26f372f3add Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 14:23:12 +0000
Subject: [PATCH 031/166] Exception fixes for Python 3 compat

---
 GPy/models/sparse_gp_minibatch.py               | 2 +-
 GPy/plotting/matplot_dep/base_plots.py          | 4 ++--
 GPy/plotting/matplot_dep/dim_reduction_plots.py | 4 ++--
 GPy/plotting/matplot_dep/kernel_plots.py        | 6 +++---
 GPy/plotting/matplot_dep/mapping_plots.py       | 2 +-
 GPy/plotting/matplot_dep/models_plots.py        | 2 +-
 GPy/plotting/matplot_dep/priors_plots.py        | 2 +-
 GPy/plotting/matplot_dep/visualize.py           | 8 ++++----
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index a6081e61..d3bbe5fe 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -50,7 +50,7 @@ class SparseGPMiniBatch(SparseGP):
                 inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
             else:
                 #inference_method = ??
-                raise NotImplementedError, "what to do what to do?"
+                raise NotImplementedError("what to do what to do?")
             print("defaulting to ", inference_method, "for latent function inference")
 
         self.kl_factr = 1.
diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py
index b4142342..f25aee49 100644
--- a/GPy/plotting/matplot_dep/base_plots.py
+++ b/GPy/plotting/matplot_dep/base_plots.py
@@ -133,7 +133,7 @@ def x_frame1D(X,plot_limits=None,resolution=None):
     elif len(plot_limits)==2:
         xmin, xmax = plot_limits
     else:
-        raise ValueError, "Bad limits for plotting"
+        raise ValueError("Bad limits for plotting")
 
     Xnew = np.linspace(xmin,xmax,resolution or 200)[:,None]
     return Xnew, xmin, xmax
@@ -149,7 +149,7 @@ def x_frame2D(X,plot_limits=None,resolution=None):
     elif len(plot_limits)==2:
         xmin, xmax = plot_limits
     else:
-        raise ValueError, "Bad limits for plotting"
+        raise ValueError("Bad limits for plotting")
 
     resolution = resolution or 50
     xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 982f8fa9..2c243e13 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -27,7 +27,7 @@ def most_significant_input_dimensions(model, which_indices):
             try:
                 input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
             except:
-                raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
+                raise ValueError("cannot automatically determine which dimensions to plot, please pass 'which_indices'")
     else:
         input_1, input_2 = which_indices
     return input_1, input_2
@@ -133,7 +133,7 @@ def plot_latent(model, labels=None, which_indices=None,
         try:
             xmin, xmax, ymin, ymax = plot_limits
         except (TypeError, ValueError) as e:
-            raise e.__class__, "Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)
+            raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
     view = ImshowController(ax, plot_function,
                             (xmin, ymin, xmax, ymax),
                             resolution, aspect=aspect, interpolation='bilinear',
diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index fc061ca7..aa015009 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -132,7 +132,7 @@ def plot(kernel,x=None, fignum=None, ax=None, title=None, plot_limits=None, reso
         elif len(plot_limits) == 2:
             xmin, xmax = plot_limits
         else:
-            raise ValueError, "Bad limits for plotting"
+            raise ValueError("Bad limits for plotting")
 
         Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
         Kx = kernel.K(Xnew, x)
@@ -154,7 +154,7 @@ def plot(kernel,x=None, fignum=None, ax=None, title=None, plot_limits=None, reso
         elif len(plot_limits) == 2:
             xmin, xmax = plot_limits
         else:
-            raise ValueError, "Bad limits for plotting"
+            raise ValueError("Bad limits for plotting")
 
         resolution = resolution or 51
         xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
@@ -168,4 +168,4 @@ def plot(kernel,x=None, fignum=None, ax=None, title=None, plot_limits=None, reso
         ax.set_ylabel("x2")
         ax.set_title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1]))
     else:
-        raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
+        raise NotImplementedError("Cannot plot a kernel with more than two input dimensions")
diff --git a/GPy/plotting/matplot_dep/mapping_plots.py b/GPy/plotting/matplot_dep/mapping_plots.py
index 6156687d..53bc1de2 100644
--- a/GPy/plotting/matplot_dep/mapping_plots.py
+++ b/GPy/plotting/matplot_dep/mapping_plots.py
@@ -81,4 +81,4 @@ def plot_mapping(self, plot_limits=None, which_data='all', which_parts='all', re
         ax.set_ylim(xmin[1], xmax[1])
 
     else:
-        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+        raise NotImplementedError("Cannot define a frame with more than two input dimensions")
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index d2d5a8e2..5cdf69fc 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -175,7 +175,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
             plots['inducing_inputs'] = ax.plot(Zu[:,0], Zu[:,1], 'wo')
 
     else:
-        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+        raise NotImplementedError("Cannot define a frame with more than two input dimensions")
     return plots
 
 def plot_fit_f(model, *args, **kwargs):
diff --git a/GPy/plotting/matplot_dep/priors_plots.py b/GPy/plotting/matplot_dep/priors_plots.py
index 8f02a03b..39dad631 100644
--- a/GPy/plotting/matplot_dep/priors_plots.py
+++ b/GPy/plotting/matplot_dep/priors_plots.py
@@ -29,4 +29,4 @@ def plot(prior):
         pb.contour(xx, yy, zz, linewidths=2)
 
     else:
-        raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
+        raise NotImplementedError("Cannot define a frame with more than two input dimensions")
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index 50eb4b82..97f2c88b 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -25,10 +25,10 @@ class data_show:
         # If no axes are defined, create some.
 
     def modify(self, vals):
-        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+        raise NotImplementedError("this needs to be implemented to use the data_show class")
 
     def close(self):
-        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+        raise NotImplementedError("this needs to be implemented to use the data_show class")
 
 class vpython_show(data_show):
     """
@@ -403,7 +403,7 @@ class mocap_data_show_vpython(vpython_show):
         self.modify_vertices()
 
     def process_values(self):
-        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+        raise NotImplementedError("this needs to be implemented to use the data_show class")
 
 class mocap_data_show(matplotlib_show):
     """Base class for visualizing motion capture data."""
@@ -455,7 +455,7 @@ class mocap_data_show(matplotlib_show):
         self.axes.figure.canvas.draw()
 
     def process_values(self):
-        raise NotImplementedError, "this needs to be implemented to use the data_show class"
+        raise NotImplementedError("this needs to be implemented to use the data_show class")
 
     def initialize_axes(self, boundary=0.05):
         """Set up the axes with the right limits and scaling."""

From c6b43d91da24a3339b7dbb197bc8eb3d4a15cd9f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 14:29:40 +0000
Subject: [PATCH 032/166] Exception fixes for Python 3 compat

---
 GPy/testing/kernel_tests.py | 2 +-
 GPy/util/caching.py         | 4 ++--
 GPy/util/choleskies.py      | 2 +-
 GPy/util/config.py          | 2 +-
 GPy/util/datasets.py        | 2 +-
 GPy/util/linalg.py          | 4 ++--
 GPy/util/ln_diff_erfs.py    | 2 +-
 GPy/util/mocap.py           | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index c1bb9265..3b09d6e7 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -282,7 +282,7 @@ class KernelGradientTestsContinuous(unittest.TestCase):
         try:
             k.K(self.X)
         except AssertionError:
-            raise AssertionError, "k.K(X) should run on self.D-1 dimension"
+            raise AssertionError("k.K(X) should run on self.D-1 dimension")
 
     def test_Matern52(self):
         k = GPy.kern.Matern52(self.D)
diff --git a/GPy/util/caching.py b/GPy/util/caching.py
index 16adc320..b1419aec 100644
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@@ -148,10 +148,10 @@ class Cacher(object):
         return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs)
 
     def __getstate__(self, memo=None):
-        raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
+        raise NotImplementedError("Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)))
 
     def __setstate__(self, memo=None):
-        raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation))
+        raise NotImplementedError("Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)))
 
     @property
     def __name__(self):
diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index cc3a7f75..606229f7 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -10,7 +10,7 @@ def safe_root(N):
     i = np.sqrt(N)
     j = int(i)
     if i != j:
-        raise ValueError, "N is not square!"
+        raise ValueError("N is not square!")
     return j
 
 def flat_to_triang(flat):
diff --git a/GPy/util/config.py b/GPy/util/config.py
index 6dad46c8..8496fe36 100644
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@@ -20,4 +20,4 @@ user_file = os.path.join(home,'.gpy_user.cfg')
 config.readfp(open(default_file))
 config.read([local_file, user_file])
 if not config:
-    raise ValueError, "No configuration file found at either " + user_file + " or " + local_file + " or " + default_file + "."
+    raise ValueError("No configuration file found at either " + user_file + " or " + local_file + " or " + default_file + ".")
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 3ba46f52..2648dd8d 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -785,7 +785,7 @@ def hapmap3(data_set='hapmap3'):
         from sys import stdout
         import bz2
     except ImportError as i:
-        raise i, "Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset"
+        raise i("Need pandas for hapmap dataset, make sure to install pandas (http://pandas.pydata.org/) before loading the hapmap dataset")
 
     dir_path = os.path.join(data_path,'hapmap3')
     hapmap_file_name = 'hapmap3_r2_b36_fwd.consensus.qc.poly'
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index d7ad5d61..04b341f3 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -91,7 +91,7 @@ def jitchol(A, maxtries=5):
     else:
         diagA = np.diag(A)
         if np.any(diagA <= 0.):
-            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
+            raise linalg.LinAlgError("not pd: non-positive diagonal elements")
         jitter = diagA.mean() * 1e-6
         num_tries = 1
         while num_tries <= maxtries and np.isfinite(jitter):
@@ -105,7 +105,7 @@ def jitchol(A, maxtries=5):
     import traceback
     logging.warning('\n'.join(['Added {} rounds of jitter, jitter of {:.10e}'.format(num_tries-1, jitter),
                                 '  in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
-    raise linalg.LinAlgError, "not positive definite, even with jitter."
+    raise linalg.LinAlgError("not positive definite, even with jitter.")
 
 # def dtrtri(L, lower=1):
 #     """
diff --git a/GPy/util/ln_diff_erfs.py b/GPy/util/ln_diff_erfs.py
index 582a4585..c1137283 100644
--- a/GPy/util/ln_diff_erfs.py
+++ b/GPy/util/ln_diff_erfs.py
@@ -35,7 +35,7 @@ def ln_diff_erfs(x1, x2, return_sign=False):
         elif x2.size==1:
             v = np.zeros(x1.shape)
         else:
-            raise ValueError, "This function does not broadcast unless provided with a scalar."
+            raise ValueError("This function does not broadcast unless provided with a scalar.")
     
     if x1.size == 1:
         x1 = np.tile(x1, x2.shape)
diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index 58662cf9..bcc3c029 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -174,7 +174,7 @@ class skeleton(tree):
         return connection
 
     def to_xyz(self, channels):
-        raise NotImplementedError, "this needs to be implemented to use the skeleton class"
+        raise NotImplementedError("this needs to be implemented to use the skeleton class")
 
 
     def finalize(self):

From e5080eb0ad02cbd369a0e16ce3a0acb6200c75d4 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 14:40:58 +0000
Subject: [PATCH 033/166] Added Python 3 progress to README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 9111a48c..dca746bf 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ Work done so far:
 
 * Used 2to3 to fix relative imports
 * Used 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
+* Used 2to3 to convert exceptions to Python 3 friendly versions. There are a few oustanding string exceptions to take care of that 2to3 doesn't handle. Will need to do these manually
 
 ### Citation
 

From 046bd3d9556bab0a9457b89b0e58ee605f344e40 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Thu, 26 Feb 2015 16:55:17 +0000
Subject: [PATCH 034/166] Commented out cholupdate since it uses weave and
 appears not to be used

---
 GPy/util/linalg.py | 57 +++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 04b341f3..0c4cdc50 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -489,34 +489,35 @@ def symmetrify_numpy(A, upper=False):
     else:
         A[triu] = A.T[triu]
 
-def cholupdate(L, x):
-    """
-    update the LOWER cholesky factor of a pd matrix IN PLACE
-
-    if L is the lower chol. of K, then this function computes L\_
-    where L\_ is the lower chol of K + x*x^T
-
-    """
-    support_code = """
-    #include <math.h>
-    """
-    code = """
-    double r,c,s;
-    int j,i;
-    for(j=0; j<N; j++){
-      r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
-      c = r / L(j,j);
-      s = x(j) / L(j,j);
-      L(j,j) = r;
-      for (i=j+1; i<N; i++){
-        L(i,j) = (L(i,j) + s*x(i))/c;
-        x(i) = c*x(i) - s*L(i,j);
-      }
-    }
-    """
-    x = x.copy()
-    N = x.size
-    weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
+#This function appears to be unused. It's use of weave makes it problematic
+#Commenting out for now
+#def cholupdate(L, x):
+#    """
+#    update the LOWER cholesky factor of a pd matrix IN PLACE
+#
+#    if L is the lower chol. of K, then this function computes L\_
+#    where L\_ is the lower chol of K + x*x^T
+#    """
+#    support_code = """
+#    #include <math.h>
+#    """
+#    code = """
+#    double r,c,s;
+#    int j,i;
+#    for(j=0; j<N; j++){
+#      r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
+#      c = r / L(j,j);
+#      s = x(j) / L(j,j);
+#      L(j,j) = r;
+#      for (i=j+1; i<N; i++){
+#        L(i,j) = (L(i,j) + s*x(i))/c;
+#        x(i) = c*x(i) - s*L(i,j);
+#      }
+#    }
+#    """
+#    x = x.copy()
+#    N = x.size
+#    weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
 
 def backsub_both_sides(L, X, transpose='left'):
     """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""

From afd92820f72d4083dc4146ced5f5491ab8a6b1c6 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 07:59:54 +0000
Subject: [PATCH 035/166] Fixed ConfigParser for Python 3 compat

---
 GPy/util/config.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/GPy/util/config.py b/GPy/util/config.py
index 8496fe36..312d6991 100644
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@@ -1,9 +1,18 @@
 #
 # This loads the configuration
 #
-import ConfigParser
 import os
-config = ConfigParser.ConfigParser()
+try:
+    #Attempt Python 2 ConfigParser setup
+    import ConfigParser
+    config = ConfigParser.ConfigParser()
+except ImportError:
+    #Attempt Python 3 ConfigParser setup
+    import configparser
+    config = configparser.ConfigParser()
+    
+
+    
 
 # This is the default configuration file that always needs to be present.
 default_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'defaults.cfg'))

From 82ea1979720e1f241bb82b3bb862f0fe42bee5fd Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 15:39:10 +0000
Subject: [PATCH 036/166] Put weave import in a try block so it fails
 gracefullt in Py3

---
 GPy/util/linalg.py |  8 +++++++-
 README.md          | 23 ++++++++++++++---------
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 0c4cdc50..9fd44a8d 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -6,7 +6,7 @@
 # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
 
 import numpy as np
-from scipy import linalg, weave
+from scipy import linalg
 import types
 import ctypes
 from ctypes import byref, c_char, c_int, c_double # TODO
@@ -16,6 +16,12 @@ import os
 from .config import config
 import logging
 
+try:
+    from scipy import weave
+except ImportError:
+    config.set('weave', 'working', 'False')
+    
+
 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
 _fix_dpotri_scipy_bug = True
 if np.all(_scipyversion >= np.array([0, 14])):
diff --git a/README.md b/README.md
index dca746bf..17dfc241 100644
--- a/README.md
+++ b/README.md
@@ -10,15 +10,6 @@ A Gaussian processes framework in Python.
 
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
-### Moving to Python 3
-Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and above.
-
-Work done so far:
-
-* Used 2to3 to fix relative imports
-* Used 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
-* Used 2to3 to convert exceptions to Python 3 friendly versions. There are a few oustanding string exceptions to take care of that 2to3 doesn't handle. Will need to do these manually
-
 ### Citation
 
     @Misc{gpy2014,
@@ -119,6 +110,20 @@ or from within IPython
 
     import GPy; GPy.tests()
 
+### Moving to Python 3
+Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and above.
+
+Work done so far:
+
+* Used 2to3 to fix relative imports
+* Used 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
+* Used 2to3 to convert exceptions to Python 3 friendly versions. There are a few oustanding string exceptions to take care of that 2to3 doesn't handle. Will need to do these manually
+* Handled the different imports required for ConfigParser/configparser in Py2/Py3
+* In utils/linalg.py:
+    * Commented out the function cholupdate(L, x) since it doesn't appear to be used. Its definitely not in the tests.s
+    * Put the import for scipy.weave in a try/except block so that it will gracefully fail in Py3
+		       
+
 
 
 ## Funding Acknowledgements

From 34511494273f974275a539c83fb1dede65fb7076 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 15:54:31 +0000
Subject: [PATCH 037/166] Exception raising fix for Python 3

---
 GPy/util/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 2648dd8d..8aced11a 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -113,7 +113,7 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
         os.makedirs(dir_name)
     try:
         response = urllib2.urlopen(url+suffix)
-    except urllib2.URLError, e:
+    except urllib2.URLError as e:
         if not hasattr(e, "code"):
             raise
         response = e

From 7bdb6ee556dcac89cef87d116f61bc46dd192849 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 15:57:57 +0000
Subject: [PATCH 038/166] Fixed cPickle import for Python 3

---
 GPy/util/datasets.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 8aced11a..de157364 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -5,7 +5,6 @@ import copy
 import numpy as np
 import GPy
 import scipy.io
-import cPickle as pickle
 import zipfile
 import tarfile
 import datetime
@@ -20,6 +19,12 @@ try:
 except ImportError:
     ipython_available=False
 
+try:
+    #In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
+    #if available
+    import cPickle as pickle
+except ImportError:
+    import pickle
 
 import sys, urllib2
 

From 9e94830528af69734470dcb943e9ab1a801bc786 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 16:39:31 +0000
Subject: [PATCH 039/166] urllib2 fixes for Py3 compatibility

---
 GPy/util/datasets.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index de157364..57755ea9 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -10,7 +10,7 @@ import tarfile
 import datetime
 import json
 import re
-
+import sys
 from .config import *
 
 ipython_available=True
@@ -26,7 +26,13 @@ try:
 except ImportError:
     import pickle
 
-import sys, urllib2
+#A Python2/3 import handler - urllib2 changed its name in Py3 and was also reorganised
+try:
+    from urllib2 import urlopen
+    from urllib2 import URLError
+except ImportError:
+    from urllib.request import urlopen
+    from urllib.error import URLError
 
 def reporthook(a,b,c):
     # ',' at the end of the line is important!
@@ -117,8 +123,8 @@ def download_url(url, store_directory, save_name=None, messages=True, suffix='')
     if not os.path.exists(dir_name):
         os.makedirs(dir_name)
     try:
-        response = urllib2.urlopen(url+suffix)
-    except urllib2.URLError as e:
+        response = urlopen(url+suffix)
+    except URLError as e:
         if not hasattr(e, "code"):
             raise
         response = e
@@ -511,7 +517,7 @@ def google_trends(query_terms=['big data', 'machine learning', 'data science'],
         print("Fetching query:")
         query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(quoted_terms)
 
-        data = urllib2.urlopen(query).read()
+        data = urlopen(query).read()
         print("Done.")
         # In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
         header = """// Data table response\ngoogle.visualization.Query.setResponse("""

From a9559acbd04fe6be253509614ece200a73f2063e Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 16:47:26 +0000
Subject: [PATCH 040/166] Removed import urllib2 since it wasn't being used

---
 GPy/util/mocap.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/util/mocap.py b/GPy/util/mocap.py
index bcc3c029..4f6336c5 100644
--- a/GPy/util/mocap.py
+++ b/GPy/util/mocap.py
@@ -2,7 +2,6 @@ import os
 import numpy as np
 import math
 from GPy.util import datasets as dat
-import urllib2
 
 class vertex:
     def __init__(self, name, id, parents=[], children=[], meta = {}):

From 381d28e6c8c1fdbaf82835106151edc170ae4642 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 16:50:34 +0000
Subject: [PATCH 041/166] Updated Py3 work

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 17dfc241..98613ce5 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,7 @@ Work done so far:
 * In utils/linalg.py:
     * Commented out the function cholupdate(L, x) since it doesn't appear to be used. Its definitely not in the tests.s
     * Put the import for scipy.weave in a try/except block so that it will gracefully fail in Py3
+* Fixed a couple of urllib2 issues - had to be done mannual since 2to3 didn't help
 		       
 
 

From 6554c32d23457c2757aaf6a60284ca52efc54dd6 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 16:53:54 +0000
Subject: [PATCH 042/166] Changed <> to != for Py3 compatibility

---
 GPy/util/multioutput.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/multioutput.py b/GPy/util/multioutput.py
index cc9af29e..2233dbb6 100644
--- a/GPy/util/multioutput.py
+++ b/GPy/util/multioutput.py
@@ -51,7 +51,7 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='ICM'):
     :param W_rank: number tuples of the corregionalization parameters 'W'
     :type W_rank: integer
     """
-    if kernel.input_dim <> input_dim:
+    if kernel.input_dim != input_dim:
         kernel.input_dim = input_dim
         warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
 

From 786feded414e7ad00a562890eec930655c65633f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:35:26 +0000
Subject: [PATCH 043/166] Import fix for Py3

---
 .../latent_function_inference/__init__.py     | 16 ++--
 GPy/util/univariate_Gaussian.py               | 73 ++++++++++---------
 2 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index 67f57638..2d52369f 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -61,15 +61,15 @@ class InferenceMethodList(LatentFunctionInference, list):
         for inf in state:
             self.append(inf)
 
-from exact_gaussian_inference import ExactGaussianInference
-from laplace import Laplace
+from .exact_gaussian_inference import ExactGaussianInference
+from .laplace import Laplace
 from GPy.inference.latent_function_inference.var_dtc import VarDTC
-from expectation_propagation import EP
-from expectation_propagation_dtc import EPDTC
-from dtc import DTC
-from fitc import FITC
-from var_dtc_parallel import VarDTC_minibatch
-from svgp import SVGP
+from .expectation_propagation import EP
+from .expectation_propagation_dtc import EPDTC
+from .dtc import DTC
+from .fitc import FITC
+from .var_dtc_parallel import VarDTC_minibatch
+from .svgp import SVGP
 
 # class FullLatentFunctionData(object):
 #
diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py
index 09b2e99c..977eb461 100644
--- a/GPy/util/univariate_Gaussian.py
+++ b/GPy/util/univariate_Gaussian.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from scipy import weave
+#from scipy import weave
 
 def std_norm_pdf(x):
     """Standard Gaussian density function"""
@@ -37,41 +37,42 @@ def std_norm_cdf(x):
         cdf_x = cdf_x.reshape(x_shape)
     return cdf_x
 
-def std_norm_cdf_weave(x):
-    """
-    Cumulative standard Gaussian distribution
-    Based on Abramowitz, M. and Stegun, I. (1970)
-
-    A weave implementation of std_norm_cdf, which is faster. this is unused,
-    because of the difficulties of a weave dependency. (see github issue #94)
-
-    """
-    #Generalize for many x
-    x = np.asarray(x).copy()
-    cdf_x = np.zeros_like(x)
-    N = x.size
-    support_code = "#include <math.h>"
-    code = """
-
-    double sign, t, erf;
-    for (int i=0; i<N; i++){
-        sign = 1.0;
-        if (x[i] < 0.0){
-            sign = -1.0;
-            x[i] = -x[i];
-        }
-        x[i] = x[i]/sqrt(2.0);
-
-        t = 1.0/(1.0 +  0.3275911*x[i]);
-
-        erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
-
-        //return_val = 0.5*(1.0 + sign*erf);
-        cdf_x[i] = 0.5*(1.0 + sign*erf);
-    }
-    """
-    weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
-    return cdf_x
+#Commented out since this isn't used...and since it breaks Py3 compatibility
+#def std_norm_cdf_weave(x):
+#    """
+#    Cumulative standard Gaussian distribution
+#    Based on Abramowitz, M. and Stegun, I. (1970)
+#
+#    A weave implementation of std_norm_cdf, which is faster. this is unused,
+#    because of the difficulties of a weave dependency. (see github issue #94)
+#
+#    """
+#    #Generalize for many x
+#    x = np.asarray(x).copy()
+#    cdf_x = np.zeros_like(x)
+#    N = x.size
+#    support_code = "#include <math.h>"
+#    code = """
+#
+#    double sign, t, erf;
+#    for (int i=0; i<N; i++){
+#        sign = 1.0;
+#        if (x[i] < 0.0){
+#            sign = -1.0;
+#            x[i] = -x[i];
+#        }
+#        x[i] = x[i]/sqrt(2.0);
+#
+#       t = 1.0/(1.0 +  0.3275911*x[i]);
+#
+#        erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
+#
+#        //return_val = 0.5*(1.0 + sign*erf);
+#        cdf_x[i] = 0.5*(1.0 + sign*erf);
+#    }
+#    """
+#    weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
+#    return cdf_x
 
 def inv_std_norm_cdf(x):
     """

From 506a57def88dd10e66a666afca8c4772458da4d6 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:39:15 +0000
Subject: [PATCH 044/166] Import fixes for Py3

---
 GPy/inference/latent_function_inference/dtc.py                  | 2 +-
 .../latent_function_inference/exact_gaussian_inference.py       | 2 +-
 .../latent_function_inference/expectation_propagation.py        | 2 +-
 .../latent_function_inference/expectation_propagation_dtc.py    | 2 +-
 GPy/inference/latent_function_inference/fitc.py                 | 2 +-
 GPy/inference/latent_function_inference/laplace.py              | 2 +-
 GPy/inference/latent_function_inference/svgp.py                 | 2 +-
 GPy/inference/latent_function_inference/var_dtc.py              | 2 +-
 GPy/inference/latent_function_inference/var_dtc_parallel.py     | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py
index 57a451b2..95600a91 100644
--- a/GPy/inference/latent_function_inference/dtc.py
+++ b/GPy/inference/latent_function_inference/dtc.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012-2014, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from posterior import Posterior
+from .posterior import Posterior
 from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
 import numpy as np
 from . import LatentFunctionInference
diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 1312d36a..ebf59eaa 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from posterior import Posterior
+from .posterior import Posterior
 from ...util.linalg import pdinv, dpotrs, tdot
 from ...util import diag
 import numpy as np
diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
index 26144974..4c553145 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
-from posterior import Posterior
+from .posterior import Posterior
 from . import LatentFunctionInference
 log_2_pi = np.log(2*np.pi)
 
diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index e25df388..86dcb691 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -6,7 +6,7 @@ from ...util import diag
 from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
 from ...core.parameterization.variational import VariationalPosterior
 from . import LatentFunctionInference
-from posterior import Posterior
+from .posterior import Posterior
 log_2_pi = np.log(2*np.pi)
 
 class EPDTC(LatentFunctionInference):
diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py
index abe53f3d..7011aef8 100644
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from posterior import Posterior
+from .posterior import Posterior
 from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
 from ...util import diag
 import numpy as np
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 05711b0b..5bbce35c 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -12,7 +12,7 @@
 
 import numpy as np
 from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
-from posterior import Posterior
+from .posterior import Posterior
 import warnings
 def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
     return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)
diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 52db242c..9726335f 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -2,7 +2,7 @@ from . import LatentFunctionInference
 from ...util import linalg
 from ...util import choleskies
 import numpy as np
-from posterior import Posterior
+from .posterior import Posterior
 
 class SVGP(LatentFunctionInference):
 
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
index a878ed18..97d8dfe3 100644
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from posterior import Posterior
+from .posterior import Posterior
 from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index cac69872..cb117af1 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from posterior import Posterior
+from .posterior import Posterior
 from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior

From 4b7036bdb2e614afe5f0b65dd5e38e064ef38b7a Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:45:20 +0000
Subject: [PATCH 045/166] More input fixes

---
 GPy/inference/optimization/__init__.py                   | 4 ++--
 GPy/inference/optimization/conjugate_gradient_descent.py | 2 +-
 GPy/inference/optimization/optimization.py               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/inference/optimization/__init__.py b/GPy/inference/optimization/__init__.py
index 1a8f043b..909f897b 100644
--- a/GPy/inference/optimization/__init__.py
+++ b/GPy/inference/optimization/__init__.py
@@ -1,2 +1,2 @@
-from scg import SCG
-from optimization import *
+from .scg import SCG
+from .optimization import *
diff --git a/GPy/inference/optimization/conjugate_gradient_descent.py b/GPy/inference/optimization/conjugate_gradient_descent.py
index 274de784..fc2d8b61 100644
--- a/GPy/inference/optimization/conjugate_gradient_descent.py
+++ b/GPy/inference/optimization/conjugate_gradient_descent.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012-2014, Max Zwiessele
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from gradient_descent_update_rules import FletcherReeves, \
+from .gradient_descent_update_rules import FletcherReeves, \
     PolakRibiere
 from Queue import Empty
 from multiprocessing import Value
diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py
index 600de35d..2179bf5e 100644
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@@ -10,7 +10,7 @@ try:
     rasm_available = True
 except ImportError:
     rasm_available = False
-from scg import SCG
+from .scg import SCG
 
 class Optimizer():
     """

From 7a9203be4d57c58dd3c5d1a9b3f8c02423368763 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:50:48 +0000
Subject: [PATCH 046/166] More input fixes

---
 GPy/inference/mcmc/__init__.py | 2 +-
 GPy/inference/mcmc/samplers.py | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/mcmc/__init__.py b/GPy/inference/mcmc/__init__.py
index 956448d4..8f185457 100644
--- a/GPy/inference/mcmc/__init__.py
+++ b/GPy/inference/mcmc/__init__.py
@@ -1 +1 @@
-from hmc import HMC
+from .hmc import HMC
diff --git a/GPy/inference/mcmc/samplers.py b/GPy/inference/mcmc/samplers.py
index ff396a96..6459e8af 100644
--- a/GPy/inference/mcmc/samplers.py
+++ b/GPy/inference/mcmc/samplers.py
@@ -9,7 +9,13 @@ import sys
 import re
 import numdifftools as ndt
 import pdb
-import cPickle
+
+try:
+    #In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
+    #if available
+    import cPickle as pickle
+except ImportError:
+    import pickle
 
 
 class Metropolis_Hastings:

From 17f14537a36feea92eb0562bcaf253e97dee94cd Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:52:33 +0000
Subject: [PATCH 047/166] Fixed inconsistent tab error

---
 GPy/core/verbose_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index af64d3a8..60d8cba8 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -146,5 +146,5 @@ class VerboseOptimization(object):
             if not self.ipython_notebook:
                 print()
                 print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
-		print('Optimization status: {0:.5g}'.format(self.status))             
+                print('Optimization status: {0:.5g}'.format(self.status))             
 		print()

From e07d554cb5e473143ddb468dcf60f343c08bdd5e Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:53:29 +0000
Subject: [PATCH 048/166] Fixed inconsistent tab error

---
 GPy/core/verbose_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index 60d8cba8..4b1d0220 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -147,4 +147,4 @@ class VerboseOptimization(object):
                 print()
                 print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
                 print('Optimization status: {0:.5g}'.format(self.status))             
-		print()
+                print()

From 19e9c9e7358f8deb14c2947955bc5fc0e3c3a1cf Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 17:55:58 +0000
Subject: [PATCH 049/166] Import fixes for Py3

---
 GPy/kern/_src/ODE_UY.py              |  4 ++--
 GPy/kern/_src/ODE_UYC.py             |  4 ++--
 GPy/kern/_src/ODE_st.py              |  4 ++--
 GPy/kern/_src/ODE_t.py               |  4 ++--
 GPy/kern/_src/add.py                 | 14 +++++++-------
 GPy/kern/_src/brownian.py            |  2 +-
 GPy/kern/_src/coregionalize.py       |  2 +-
 GPy/kern/_src/eq_ode2.py             |  2 +-
 GPy/kern/_src/independent_outputs.py |  2 +-
 GPy/kern/_src/kern.py                |  6 +++---
 GPy/kern/_src/linear.py              |  2 +-
 GPy/kern/_src/mlp.py                 |  2 +-
 GPy/kern/_src/periodic.py            |  2 +-
 GPy/kern/_src/poly.py                |  2 +-
 GPy/kern/_src/prod.py                |  2 +-
 GPy/kern/_src/rbf.py                 |  6 +++---
 GPy/kern/_src/splitKern.py           |  2 +-
 GPy/kern/_src/static.py              |  2 +-
 GPy/kern/_src/stationary.py          |  2 +-
 GPy/kern/_src/symbolic.py            |  2 +-
 GPy/kern/_src/trunclinear.py         |  2 +-
 21 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/GPy/kern/_src/ODE_UY.py b/GPy/kern/_src/ODE_UY.py
index eef8609b..9c9b47be 100644
--- a/GPy/kern/_src/ODE_UY.py
+++ b/GPy/kern/_src/ODE_UY.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
-from independent_outputs import index_to_slices
+from .independent_outputs import index_to_slices
 
 class ODE_UY(Kern):
     def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
diff --git a/GPy/kern/_src/ODE_UYC.py b/GPy/kern/_src/ODE_UYC.py
index 4c39a9c9..ff75a328 100644
--- a/GPy/kern/_src/ODE_UYC.py
+++ b/GPy/kern/_src/ODE_UYC.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
-from independent_outputs import index_to_slices
+from .independent_outputs import index_to_slices
 
 class ODE_UYC(Kern):
     def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
diff --git a/GPy/kern/_src/ODE_st.py b/GPy/kern/_src/ODE_st.py
index 1c3b661b..afa46d09 100644
--- a/GPy/kern/_src/ODE_st.py
+++ b/GPy/kern/_src/ODE_st.py
@@ -1,10 +1,10 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
-from independent_outputs import index_to_slices
+from .independent_outputs import index_to_slices
 
 
 class ODE_st(Kern):
diff --git a/GPy/kern/_src/ODE_t.py b/GPy/kern/_src/ODE_t.py
index 268917ae..80625f51 100644
--- a/GPy/kern/_src/ODE_t.py
+++ b/GPy/kern/_src/ODE_t.py
@@ -1,8 +1,8 @@
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
-from independent_outputs import index_to_slices
+from .independent_outputs import index_to_slices
 
 
 class ODE_t(Kern):
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 0f612f5b..17c0027a 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -4,7 +4,7 @@
 import numpy as np
 import itertools
 from ...util.caching import Cache_this
-from kern import CombinationKernel
+from .kern import CombinationKernel
 
 class Add(CombinationKernel):
     """
@@ -84,10 +84,10 @@ class Add(CombinationKernel):
         psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
         #return psi2
         # compute the "cross" terms
-        from static import White, Bias
-        from rbf import RBF
+        from .static import White, Bias
+        from .rbf import RBF
         #from rbf_inv import RBFInv
-        from linear import Linear
+        from .linear import Linear
         #ffrom fixed import Fixed
 
         for p1, p2 in itertools.combinations(self.parts, 2):
@@ -115,7 +115,7 @@ class Add(CombinationKernel):
         return psi2
 
     def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        from static import White, Bias
+        from .static import White, Bias
         for p1 in self.parts:
             #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
             eff_dL_dpsi1 = dL_dpsi1.copy()
@@ -131,7 +131,7 @@ class Add(CombinationKernel):
             p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
 
     def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        from static import White, Bias
+        from .static import White, Bias
         target = np.zeros(Z.shape)
         for p1 in self.parts:
             #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
@@ -149,7 +149,7 @@ class Add(CombinationKernel):
         return target
 
     def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
-        from static import White, Bias
+        from .static import White, Bias
         target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
         for p1 in self.parameters:
             #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py
index fd79973c..d403fce7 100644
--- a/GPy/kern/_src/brownian.py
+++ b/GPy/kern/_src/brownian.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index b6a3aecf..d76dde1f 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2012, James Hensman and Ricardo Andrade
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kern import Kern
+from .kern import Kern
 import numpy as np
 from scipy import weave
 from ...core.parameterization import Param
diff --git a/GPy/kern/_src/eq_ode2.py b/GPy/kern/_src/eq_ode2.py
index 59f67b8b..2d42a3e6 100644
--- a/GPy/kern/_src/eq_ode2.py
+++ b/GPy/kern/_src/eq_ode2.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 from scipy.special import wofz
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.caching import Cache_this
diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index 21958267..10681d57 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from kern import Kern, CombinationKernel
+from .kern import Kern, CombinationKernel
 import numpy as np
 import itertools
 
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index 57b2bff5..c4fadd57 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -4,7 +4,7 @@
 import sys
 import numpy as np
 from ...core.parameterization.parameterized import Parameterized
-from kernel_slice_operations import KernCallsViaSlicerMeta
+from .kernel_slice_operations import KernCallsViaSlicerMeta
 from ...util.caching import Cache_this
 from GPy.core.parameterization.observable_array import ObsAr
 
@@ -178,7 +178,7 @@ class Kern(Parameterized):
 
         """
         assert isinstance(other, Kern), "only kernels can be added to kernels..."
-        from add import Add
+        from .add import Add
         return Add([self, other], name=name)
 
     def __mul__(self, other):
@@ -210,7 +210,7 @@ class Kern(Parameterized):
 
         """
         assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
-        from prod import Prod
+        from .prod import Prod
         #kernels = []
         #if isinstance(self, Prod): kernels.extend(self.parameters)
         #else: kernels.append(self)
diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py
index 9d1a956b..e3a45c67 100644
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@@ -3,7 +3,7 @@
 
 
 import numpy as np
-from kern import Kern
+from .kern import Kern
 from ...util.linalg import tdot
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index 16e84363..4488ea82 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 import numpy as np
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index e8e16506..36fcb596 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -3,7 +3,7 @@
 
 
 import numpy as np
-from kern import Kern
+from .kern import Kern
 from ...util.linalg import mdot
 from ...util.decorators import silence_errors
 from ...core.parameterization.param import Param
diff --git a/GPy/kern/_src/poly.py b/GPy/kern/_src/poly.py
index b90e8f8f..a5306c2a 100644
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 class Poly(Kern):
diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index bff6d841..84bd1e1d 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from kern import CombinationKernel
+from .kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
 
diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py
index 0c6a4aef..c6998370 100644
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@@ -3,9 +3,9 @@
 
 
 import numpy as np
-from stationary import Stationary
-from psi_comp import PSICOMP_RBF
-from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
+from .stationary import Stationary
+from .psi_comp import PSICOMP_RBF
+from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
 from ...util.config import *
 
 class RBF(Stationary):
diff --git a/GPy/kern/_src/splitKern.py b/GPy/kern/_src/splitKern.py
index 27e4f76b..18771cb0 100644
--- a/GPy/kern/_src/splitKern.py
+++ b/GPy/kern/_src/splitKern.py
@@ -3,7 +3,7 @@ A new kernel
 """
 
 import numpy as np
-from kern import Kern,CombinationKernel
+from .kern import Kern,CombinationKernel
 from .independent_outputs import index_to_slices
 import itertools
 
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index f4223bf4..77e395fd 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from kern import Kern
+from .kern import Kern
 import numpy as np
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 0cd85b38..5052b7b0 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.linalg import tdot
diff --git a/GPy/kern/_src/symbolic.py b/GPy/kern/_src/symbolic.py
index 9ca20ea5..c339893a 100644
--- a/GPy/kern/_src/symbolic.py
+++ b/GPy/kern/_src/symbolic.py
@@ -1,7 +1,7 @@
 # Check Matthew Rocklin's blog post.
 import sympy as sym
 import numpy as np
-from kern import Kern
+from .kern import Kern
 from ...core.symbolic import Symbolic_core
 
 
diff --git a/GPy/kern/_src/trunclinear.py b/GPy/kern/_src/trunclinear.py
index 4ebd51b6..8c48f134 100644
--- a/GPy/kern/_src/trunclinear.py
+++ b/GPy/kern/_src/trunclinear.py
@@ -3,7 +3,7 @@
 
 
 import numpy as np
-from kern import Kern
+from .kern import Kern
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.caching import Cache_this

From 7353fd557524fd05ccaca179491cb9ef48597afc Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 18:49:28 +0000
Subject: [PATCH 050/166] More import fixes for Py3

---
 GPy/kern/_src/psi_comp/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py
index 74aacd75..5041da50 100644
--- a/GPy/kern/_src/psi_comp/__init__.py
+++ b/GPy/kern/_src/psi_comp/__init__.py
@@ -4,10 +4,10 @@
 from ....core.parameterization.parameter_core import Pickleable
 from GPy.util.caching import Cache_this
 from ....core.parameterization import variational
-import rbf_psi_comp
-import ssrbf_psi_comp
-import sslinear_psi_comp
-import linear_psi_comp
+from . import rbf_psi_comp
+from . import ssrbf_psi_comp
+from . import sslinear_psi_comp
+from . import linear_psi_comp
 
 class PSICOMP_RBF(Pickleable):
     @Cache_this(limit=2, ignore_args=(0,))

From dce76d3226f71ba8a608594c7b6e57217f310d8a Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 18:53:10 +0000
Subject: [PATCH 051/166] Fix weave import for Py3

---
 GPy/kern/_src/coregionalize.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index d76dde1f..1b16fd73 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -3,11 +3,15 @@
 
 from .kern import Kern
 import numpy as np
-from scipy import weave
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.config import config # for assesing whether to use weave
 
+try:
+    from scipy import weave
+except ImportError:
+    config.set('weave', 'working', 'False')
+
 class Coregionalize(Kern):
     """
     Covariance function for intrinsic/linear coregionalization models

From 4c3d68b761cfebc682e3692753a544131e8d6161 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 19:00:55 +0000
Subject: [PATCH 052/166] Fixed tab/space indentation issue

---
 GPy/core/parameterization/priors.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index edc83c38..298ca2d2 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -549,7 +549,7 @@ class DGPLVM(Prior):
         M_i = np.zeros((self.classnum, self.dim))
         for i in cls:
             # Mean of each class
-	    class_i = cls[i]
+            class_i = cls[i]
             M_i[i] = np.mean(class_i, axis=0)
         return M_i
 
@@ -663,7 +663,7 @@ class DGPLVM(Prior):
         # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
         #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
         #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
         return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
 
     # This function calculates derivative of the log of prior function
@@ -684,7 +684,7 @@ class DGPLVM(Prior):
         # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
         #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
         #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
         Sb_inv_N_trans = np.transpose(Sb_inv_N)
         Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
         Sw_trans = np.transpose(Sw)
@@ -742,7 +742,7 @@ class DGPLVM_T(Prior):
         self.datanum = lbl.shape[0]
         self.x_shape = x_shape
         self.dim = x_shape[1]
-	self.vec = vec
+        self.vec = vec
 
 
     def get_class_label(self, y):
@@ -768,7 +768,7 @@ class DGPLVM_T(Prior):
         M_i = np.zeros((self.classnum, self.dim))
         for i in cls:
             # Mean of each class
-	    class_i = np.multiply(cls[i],vec)
+            class_i = np.multiply(cls[i],vec)
             M_i[i] = np.mean(class_i, axis=0)
         return M_i
 
@@ -883,7 +883,7 @@ class DGPLVM_T(Prior):
         #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
 	#print 'SB_inv: ', Sb_inv_N
         #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
         return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
 
     # This function calculates derivative of the log of prior function
@@ -905,7 +905,7 @@ class DGPLVM_T(Prior):
         #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
 	#print 'SB_inv: ',Sb_inv_N
         #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
-	Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
+        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
         Sb_inv_N_trans = np.transpose(Sb_inv_N)
         Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
         Sw_trans = np.transpose(Sw)

From 09c93e62d05e8482e3be863731c1e24679f34742 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 19:03:45 +0000
Subject: [PATCH 053/166] Print fixes for Python 3

---
 GPy/testing/examples_tests.py         | 26 ++++----
 GPy/testing/index_operations_tests.py |  4 +-
 GPy/testing/kernel_tests.py           | 26 ++++----
 GPy/testing/likelihood_tests.py       | 96 +++++++++++++--------------
 GPy/testing/model_tests.py            | 16 ++---
 GPy/testing/mpi_tests.py              |  2 +-
 GPy/testing/parameterized_tests.py    |  8 +--
 GPy/testing/prior_tests.py            |  2 +-
 8 files changed, 90 insertions(+), 90 deletions(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index be26fff6..48a18119 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -46,20 +46,20 @@ def test_models():
     for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]):
         # Load examples
         module_examples = loader.find_module(module_name).load_module(module_name)
-        print "MODULE", module_examples
-        print "Before"
-        print inspect.getmembers(module_examples, predicate=inspect.isfunction)
+        print("MODULE", module_examples)
+        print("Before")
+        print(inspect.getmembers(module_examples, predicate=inspect.isfunction))
         functions = [ func for func in inspect.getmembers(module_examples, predicate=inspect.isfunction) if func[0].startswith('_') is False ][::-1]
-        print "After"
-        print functions
+        print("After")
+        print(functions)
         for example in functions:
             if example[0] in ['epomeo_gpx']:
                 #These are the edge cases that we might want to handle specially
                 if example[0] == 'epomeo_gpx' and not GPy.util.datasets.gpxpy_available:
-                    print "Skipping as gpxpy is not available to parse GPS"
+                    print("Skipping as gpxpy is not available to parse GPS")
                     continue
 
-            print "Testing example: ", example[0]
+            print("Testing example: ", example[0])
             # Generate model
 
             try:
@@ -69,7 +69,7 @@ def test_models():
             except Exception as e:
                 failing_models[example[0]] = "Cannot make model: \n{e}".format(e=e)
             else:
-                print models
+                print(models)
                 model_checkgrads.description = 'test_checkgrads_%s' % example[0]
                 try:
                     for model in models:
@@ -89,17 +89,17 @@ def test_models():
             #yield model_checkgrads, model
             #yield model_instance, model
 
-        print "Finished checking module {m}".format(m=module_name)
+        print("Finished checking module {m}".format(m=module_name))
         if len(failing_models.keys()) > 0:
-            print "Failing models: "
-            print failing_models
+            print("Failing models: ")
+            print(failing_models)
 
     if len(failing_models.keys()) > 0:
-        print failing_models
+        print(failing_models)
         raise Exception(failing_models)
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     # unittest.main()
     test_models()
diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py
index e5c2011a..e2895cd2 100644
--- a/GPy/testing/index_operations_tests.py
+++ b/GPy/testing/index_operations_tests.py
@@ -127,8 +127,8 @@ class Test(unittest.TestCase):
         self.assertEqual(self.view.size, 5)
 
     def test_print(self):
-        print self.param_index
-        print self.view
+        print(self.param_index)
+        print(self.view)
 
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.test_index_view']
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 3b09d6e7..771028f0 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -37,7 +37,7 @@ class Kern_check_model(GPy.core.Model):
     def is_positive_semi_definite(self):
         v = np.linalg.eig(self.kernel.K(self.X))[0]
         if any(v.real<=-1e-10):
-            print v.real.min()
+            print(v.real.min())
             return False
         else:
             return True
@@ -126,7 +126,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Positive definite check failed for " + kern.name + " covariance function.")
+        print(("Positive definite check failed for " + kern.name + " covariance function."))
         pass_checks = False
         assert(result)
         return False
@@ -137,7 +137,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:"))
         Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
         pass_checks = False
         assert(result)
@@ -149,7 +149,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:"))
         Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
         pass_checks = False
         assert(result)
@@ -162,11 +162,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     except NotImplementedError:
         result=True
         if verbose:
-            print("update_gradients_diag not implemented for " + kern.name)
+            print(("update_gradients_diag not implemented for " + kern.name))
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:"))
         Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
         pass_checks = False
         assert(result)
@@ -182,11 +182,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     except NotImplementedError:
         result=True
         if verbose:
-            print("gradients_X not implemented for " + kern.name)
+            print(("gradients_X not implemented for " + kern.name))
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:"))
         testmodel.checkgrad(verbose=True)
         import ipdb;ipdb.set_trace()
         assert(result)
@@ -203,11 +203,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     except NotImplementedError:
         result=True
         if verbose:
-            print("gradients_X not implemented for " + kern.name)
+            print(("gradients_X not implemented for " + kern.name))
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:"))
         testmodel.checkgrad(verbose=True)
         assert(result)
         pass_checks = False
@@ -223,11 +223,11 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     except NotImplementedError:
         result=True
         if verbose:
-            print("gradients_X not implemented for " + kern.name)
+            print(("gradients_X not implemented for " + kern.name))
     if result and verbose:
         print("Check passed.")
     if not result:
-        print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
+        print(("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:"))
         Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
         pass_checks = False
         assert(result)
@@ -404,7 +404,7 @@ class Coregionalize_weave_test(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     unittest.main()
 #     np.random.seed(0)
 #     N0 = 3
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index 95929098..5feeffa4 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -44,8 +44,8 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
     The number of parameters and N is the number of data
     Need to take a slice out from f and a slice out of df
     """
-    print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
-                                           func.__name__, dfunc.__name__)
+    print("\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
+                                           func.__name__, dfunc.__name__))
     partial_f = dparam_partial(func, *args)
     partial_df = dparam_partial(dfunc, *args)
     gradchecking = True
@@ -57,7 +57,7 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
         for fixed_val in range(dfnum):
             #dlik and dlik_dvar gives back 1 value for each
             f_ind = min(fnum, fixed_val+1) - 1
-            print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
+            print("fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val))
             #Make grad checker with this param moving, note that set_params is NOT being called
             #The parameter is being set directly with __setattr__
             #Check only the parameter and function value we wish to check at a time
@@ -70,12 +70,12 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
                     if grad.grep_param_names(constrain_param):
                         constraint(constrain_param, grad)
                     else:
-                        print "parameter didn't exist"
-                    print constrain_param, " ", constraint
+                        print("parameter didn't exist")
+                    print(constrain_param, " ", constraint)
             if randomize:
                 grad.randomize()
             if verbose:
-                print grad
+                print(grad)
                 grad.checkgrad(verbose=1)
             if not grad.checkgrad(verbose=True):
                 gradchecking = False
@@ -350,8 +350,8 @@ class TestNoiseModels(object):
     #############
     @with_setup(setUp, tearDown)
     def t_logpdf(self, model, Y, f):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         #print model._get_params()
         np.testing.assert_almost_equal(
                 model.pdf(f.copy(), Y.copy()).prod(),
@@ -360,33 +360,33 @@ class TestNoiseModels(object):
 
     @with_setup(setUp, tearDown)
     def t_dlogpdf_df(self, model, Y, f):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         self.description = "\n{}".format(inspect.stack()[0][3])
         logpdf = functools.partial(model.logpdf, y=Y)
         dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
         grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
         grad.randomize()
-        print model
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
     def t_d2logpdf_df2(self, model, Y, f):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
         d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
         grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
         grad.randomize()
-        print model
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
     def t_d3logpdf_df3(self, model, Y, f):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
         d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
         grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
         grad.randomize()
-        print model
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     ##############
@@ -394,8 +394,8 @@ class TestNoiseModels(object):
     ##############
     @with_setup(setUp, tearDown)
     def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
                     params, params_names, args=(f, Y), constraints=param_constraints,
@@ -404,8 +404,8 @@ class TestNoiseModels(object):
 
     @with_setup(setUp, tearDown)
     def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
                     params, params_names, args=(f, Y), constraints=param_constraints,
@@ -414,8 +414,8 @@ class TestNoiseModels(object):
 
     @with_setup(setUp, tearDown)
     def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
                     params, params_names, args=(f, Y), constraints=param_constraints,
@@ -427,7 +427,7 @@ class TestNoiseModels(object):
     ################
     @with_setup(setUp, tearDown)
     def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         logpdf = functools.partial(model.logpdf_link, y=Y)
         dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
         grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
@@ -437,13 +437,13 @@ class TestNoiseModels(object):
             constraint('g', grad)
 
         grad.randomize()
-        print grad
-        print model
+        print(grad)
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
     def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
         d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
         grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
@@ -453,13 +453,13 @@ class TestNoiseModels(object):
             constraint('g', grad)
 
         grad.randomize()
-        print grad
-        print model
+        print(grad)
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
     def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
         d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
         grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
@@ -469,8 +469,8 @@ class TestNoiseModels(object):
             constraint('g', grad)
 
         grad.randomize()
-        print grad
-        print model
+        print(grad)
+        print(model)
         assert grad.checkgrad(verbose=1)
 
     #################
@@ -478,8 +478,8 @@ class TestNoiseModels(object):
     #################
     @with_setup(setUp, tearDown)
     def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
                     params, param_names, args=(f, Y), constraints=param_constraints,
@@ -488,8 +488,8 @@ class TestNoiseModels(object):
 
     @with_setup(setUp, tearDown)
     def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
                     params, param_names, args=(f, Y), constraints=param_constraints,
@@ -498,8 +498,8 @@ class TestNoiseModels(object):
 
     @with_setup(setUp, tearDown)
     def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
-        print "\n{}".format(inspect.stack()[0][3])
-        print model
+        print("\n{}".format(inspect.stack()[0][3]))
+        print(model)
         assert (
                 dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
                     params, param_names, args=(f, Y), constraints=param_constraints,
@@ -511,7 +511,7 @@ class TestNoiseModels(object):
     ################
     @with_setup(setUp, tearDown)
     def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         #Normalize
         Y = Y/Y.max()
         white_var = 1e-6
@@ -524,7 +524,7 @@ class TestNoiseModels(object):
         for constrain_param, constraint in constraints:
             constraint(constrain_param, m)
 
-        print m
+        print(m)
         m.randomize()
 
         #Set params
@@ -533,7 +533,7 @@ class TestNoiseModels(object):
             m[name] = param_vals[param_num]
 
         #m.optimize(max_iters=8)
-        print m
+        print(m)
         #if not m.checkgrad(step=step):
             #m.checkgrad(verbose=1, step=step)
             #NOTE this test appears to be stochastic for some likelihoods (student t?)
@@ -546,7 +546,7 @@ class TestNoiseModels(object):
     ###########
     @with_setup(setUp, tearDown)
     def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         #Normalize
         Y = Y/Y.max()
         white_var = 1e-6
@@ -561,7 +561,7 @@ class TestNoiseModels(object):
             constraints[param_num](name, m)
 
         m.randomize()
-        print m
+        print(m)
         assert m.checkgrad(verbose=1, step=step)
 
 
@@ -598,7 +598,7 @@ class LaplaceTests(unittest.TestCase):
         self.X = None
 
     def test_gaussian_d2logpdf_df2_2(self):
-        print "\n{}".format(inspect.stack()[0][3])
+        print("\n{}".format(inspect.stack()[0][3]))
         self.Y = None
 
         self.N = 2
@@ -648,16 +648,16 @@ class LaplaceTests(unittest.TestCase):
         m2.randomize()
 
         if debug:
-            print m1
-            print m2
+            print(m1)
+            print(m2)
         optimizer = 'scg'
-        print "Gaussian"
+        print("Gaussian")
         m1.optimize(optimizer, messages=debug)
-        print "Laplace Gaussian"
+        print("Laplace Gaussian")
         m2.optimize(optimizer, messages=debug)
         if debug:
-            print m1
-            print m2
+            print(m1)
+            print(m2)
 
         m2[:] = m1[:]
 
@@ -706,5 +706,5 @@ class LaplaceTests(unittest.TestCase):
         self.assertTrue(m2.checkgrad(verbose=True))
 
 if __name__ == "__main__":
-    print "Running unit tests"
+    print("Running unit tests")
     unittest.main()
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 559014f7..f9ff6402 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -153,19 +153,19 @@ class MiscTests(unittest.TestCase):
     def test_big_model(self):
         m = GPy.examples.dimensionality_reduction.mrd_simulation(optimize=0, plot=0, plot_sim=0)
         m.X.fix()
-        print m
+        print(m)
         m.unfix()
         m.checkgrad()
-        print m
+        print(m)
         m.fix()
-        print m
+        print(m)
         m.inducing_inputs.unfix()
-        print m
+        print(m)
         m.checkgrad()
         m.unfix()
         m.checkgrad()
         m.checkgrad()
-        print m
+        print(m)
 
     def test_model_set_params(self):
         m = GPy.models.GPRegression(self.X, self.Y)
@@ -176,7 +176,7 @@ class MiscTests(unittest.TestCase):
         m['.*var'] -= .1
         np.testing.assert_equal(m.kern.lengthscale, lengthscale)
         m.optimize()
-        print m
+        print(m)
 
     def test_model_updates(self):
         Y1 = np.random.normal(0, 1, (40, 13))
@@ -201,7 +201,7 @@ class MiscTests(unittest.TestCase):
         Y = np.sin(X) + np.random.randn(20, 1) * 0.05
         m = GPy.models.GPRegression(X, Y)
         m.optimize()
-        print m
+        print(m)
 
 class GradientTests(np.testing.TestCase):
     def setUp(self):
@@ -523,5 +523,5 @@ class GradientTests(np.testing.TestCase):
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     unittest.main()
diff --git a/GPy/testing/mpi_tests.py b/GPy/testing/mpi_tests.py
index 5c489032..28a23288 100644
--- a/GPy/testing/mpi_tests.py
+++ b/GPy/testing/mpi_tests.py
@@ -84,7 +84,7 @@ except:
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     try:
         import mpi4py
         unittest.main()
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index 7c4f4ce2..431d535b 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -240,7 +240,7 @@ class ParameterizedTest(unittest.TestCase):
                 self.p2.constrain_positive()
 
         m = TestLikelihood()
-        print m
+        print(m)
         val = m.p1.values.copy()
         self.assert_(m.p1.is_fixed)
         self.assert_(m.constraints[GPy.constraints.Logexp()].tolist(), [1])
@@ -248,9 +248,9 @@ class ParameterizedTest(unittest.TestCase):
         self.assertEqual(m.p1, val)
 
     def test_printing(self):
-        print self.test1
-        print self.param
-        print self.test1['']
+        print(self.test1)
+        print(self.param)
+        print(self.test1[''])
 
 if __name__ == "__main__":
     #import sys;sys.argv = ['', 'Test.test_add_parameter']
diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py
index 6a61fbb5..ca03ad93 100644
--- a/GPy/testing/prior_tests.py
+++ b/GPy/testing/prior_tests.py
@@ -110,5 +110,5 @@ class PriorTests(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     unittest.main()

From 300bdb960bd9099e94580cbc2a739011b703d626 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 19:04:25 +0000
Subject: [PATCH 054/166] Print fixes for Python 3

---
 GPy/plotting/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/plotting/__init__.py b/GPy/plotting/__init__.py
index 652bc628..9dd84441 100644
--- a/GPy/plotting/__init__.py
+++ b/GPy/plotting/__init__.py
@@ -4,4 +4,4 @@
 try:
     from . import matplot_dep
 except (ImportError, NameError):
-    print 'Fail to load GPy.plotting.matplot_dep.'
\ No newline at end of file
+    print('Fail to load GPy.plotting.matplot_dep.')
\ No newline at end of file

From b4ad1b2d733f40a1584abd28c9dc67e8743016e4 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 19:32:38 +0000
Subject: [PATCH 055/166] Python3 compatbility fixes

---
 GPy/core/parameterization/index_operations.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index e5273e55..01a13c8b 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -66,7 +66,11 @@ class ParameterIndexOperations(object):
                 self.add(t, i)
 
     def iteritems(self):
-        return self._properties.iteritems()
+        try:
+            return self._properties.iteritems()
+        except AttributeError:
+	#Changed this from iteritems to items for Py3 compatibility. It didn't break the test suite.
+            return self._properties.items()
 
     def items(self):
         return self._properties.items()
@@ -101,7 +105,11 @@ class ParameterIndexOperations(object):
         return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
 
     def iterindices(self):
-        return self._properties.itervalues()
+        try:
+            return self._properties.itervalues()
+        except AttributeError:
+	#Changed this from itervalues to values for Py3 compatibility. It didn't break the test suite.
+            return self._properties.values()
 
     def indices(self):
         return self._properties.values()

From 58225c018828cc6afa01f7f968eb085701e6ea1e Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 27 Feb 2015 19:36:56 +0000
Subject: [PATCH 056/166] import reduce from functools for py3 compatibility

---
 GPy/core/parameterization/parameter_core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 02cb0a12..b38d9678 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -18,6 +18,7 @@ import numpy as np
 import re
 import logging
 from .updateable import Updateable
+from functools import reduce
 
 class HierarchyError(Exception):
     """

From 1c6cfe2d81437d264ee72871ebf732dfafe7ceb3 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 11:54:11 +0000
Subject: [PATCH 057/166] Changed metaclass syntax to be Py3 compatible. This
 breaks Py2 compatibility

---
 GPy/core/parameterization/parameterized.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 62914636..db945016 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -27,7 +27,7 @@ class ParametersChangedMeta(type):
         self.parameters_changed()
         return self
 
-class Parameterized(Parameterizable):
+class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
     """
     Parameterized class
 
@@ -73,6 +73,7 @@ class Parameterized(Parameterizable):
     # Metaclass for parameters changed after init.
     # This makes sure, that parameters changed will always be called after __init__
     # **Never** call parameters_changed() yourself
+    #This is ignored in Python 3 -- you need to put the meta class in the 
     __metaclass__ = ParametersChangedMeta
     #===========================================================================
     def __init__(self, name=None, parameters=[], *a, **kw):

From 6b1e20027a8f17e4be97394e9c608686628c677e Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 12:20:41 +0000
Subject: [PATCH 058/166] reduce fix for Python 3

---
 GPy/util/caching.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/util/caching.py b/GPy/util/caching.py
index b1419aec..196ce343 100644
--- a/GPy/util/caching.py
+++ b/GPy/util/caching.py
@@ -2,6 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 from ..core.parameterization.observable import Observable
 import collections, weakref
+from functools import reduce
 
 class Cacher(object):
     def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):

From 8c552c2509a89bdc198e70a90c8db1d52dc5d78b Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 12:39:39 +0000
Subject: [PATCH 059/166] Fixed string encoding for Python 3

---
 GPy/util/linalg.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 9fd44a8d..88ecf011 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -353,11 +353,11 @@ def tdot_blas(mat, out=None):
     # of C order. However, I tried that and had errors with large matrices:
     # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
     mat = np.asfortranarray(mat)
-    TRANS = c_char('n')
+    TRANS = c_char('n'.encode('ascii'))
     N = c_int(mat.shape[0])
     K = c_int(mat.shape[1])
     LDA = c_int(mat.shape[0])
-    UPLO = c_char('l')
+    UPLO = c_char('l'.encode('ascii'))
     ALPHA = c_double(1.0)
     A = mat.ctypes.data_as(ctypes.c_void_p)
     BETA = c_double(0.0)

From 79f4b26f4d6bec363c2cbe3857ed6844193c7501 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 12:41:23 +0000
Subject: [PATCH 060/166] Fixed integer division for Python 3 compat

---
 GPy/util/linalg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 88ecf011..7f1a28f3 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -362,7 +362,7 @@ def tdot_blas(mat, out=None):
     A = mat.ctypes.data_as(ctypes.c_void_p)
     BETA = c_double(0.0)
     C = out.ctypes.data_as(ctypes.c_void_p)
-    LDC = c_int(np.max(out.strides) / 8)
+    LDC = c_int(np.max(out.strides) // 8)
     dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K),
             byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
 

From b4a3253e26926125896e9208d05e9cc04f316884 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 18:22:44 +0000
Subject: [PATCH 061/166] Ensure that object.__new__ never gets called with
 arguments

---
 GPy/core/parameterization/priors.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 298ca2d2..432e2473 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -15,8 +15,12 @@ class Prior(object):
     _instance = None
     def __new__(cls, *args, **kwargs):
         if not cls._instance or cls._instance.__class__ is not cls:
-            cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
-        return cls._instance
+                newfunc = super(Prior, cls).__new__
+                if newfunc is object.__new__:
+                    cls._instance = newfunc(cls)  
+                else:
+                    cls._instance = newfunc(cls, *args, **kwargs)
+                return cls._instance
 
     def pdf(self, x):
         return np.exp(self.lnpdf(x))
@@ -52,7 +56,11 @@ class Gaussian(Prior):
             for instance in cls._instances:
                 if instance().mu == mu and instance().sigma == sigma:
                     return instance()
-        o = super(Prior, cls).__new__(cls, mu, sigma)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, mu, sigma)            
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 

From a6e28205e11df95348d148b22af9550f5381eee2 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 18:36:52 +0000
Subject: [PATCH 062/166] Ensure that object.__new__ never gets called with
 arguments

---
 GPy/core/parameterization/priors.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 432e2473..6c7f655f 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -148,7 +148,11 @@ class LogGaussian(Gaussian):
             for instance in cls._instances:
                 if instance().mu == mu and instance().sigma == sigma:
                     return instance()
-        o = super(Prior, cls).__new__(cls, mu, sigma)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, mu, sigma)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 

From 1c6796e73d6c0bf2a56c9d679e5f01cd38aec7f7 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 18:40:25 +0000
Subject: [PATCH 063/166] import reduce from functools for Py3 compat

---
 GPy/core/parameterization/index_operations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 01a13c8b..1e97f488 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -4,6 +4,7 @@
 import numpy
 from numpy.lib.function_base import vectorize
 from .lists_and_dicts import IntArrayDict
+from functools import reduce
 
 def extract_properties_to_index(index, props):
     prop_index = dict()

From 358488cf5d05e64ddc07c38c7e125aab01548220 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 18:50:58 +0000
Subject: [PATCH 064/166] Ensure that object.__new__ never gets called with
 arguments

---
 GPy/core/parameterization/priors.py          | 6 +++++-
 GPy/core/parameterization/transformations.py | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index 6c7f655f..a4bbecb3 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -270,7 +270,11 @@ class Gamma(Prior):
             for instance in cls._instances:
                 if instance().a == a and instance().b == b:
                     return instance()
-        o = super(Prior, cls).__new__(cls, a, b)
+        newfunc = super(Prior, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, a, b)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
 
diff --git a/GPy/core/parameterization/transformations.py b/GPy/core/parameterization/transformations.py
index 05051c92..7e15cee9 100644
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@@ -468,7 +468,11 @@ class Logistic(Transformation):
             for instance in cls._instances:
                 if instance().lower == lower and instance().upper == upper:
                     return instance()
-        o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs)
+        newfunc = super(Transformation, cls).__new__
+        if newfunc is object.__new__:
+            o = newfunc(cls)  
+        else:
+            o = newfunc(cls, lower, upper, *args, **kwargs)
         cls._instances.append(weakref.ref(o))
         return cls._instances[-1]()
     def __init__(self, lower, upper):

From e0d0f2e633c18dd1e6ddc324957624105c1128d3 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 18:59:14 +0000
Subject: [PATCH 065/166] from functools import reduce for Py3 compat

---
 GPy/kern/_src/add.py               | 1 +
 GPy/kern/_src/kern.py              | 1 +
 GPy/testing/parameterized_tests.py | 1 +
 GPy/testing/pickle_tests.py        | 1 +
 4 files changed, 4 insertions(+)

diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 17c0027a..82c84c52 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -5,6 +5,7 @@ import numpy as np
 import itertools
 from ...util.caching import Cache_this
 from .kern import CombinationKernel
+from functools import reduce
 
 class Add(CombinationKernel):
     """
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index c4fadd57..6ccd315b 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -7,6 +7,7 @@ from ...core.parameterization.parameterized import Parameterized
 from .kernel_slice_operations import KernCallsViaSlicerMeta
 from ...util.caching import Cache_this
 from GPy.core.parameterization.observable_array import ObsAr
+from functools import reduce
 
 
 
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index 431d535b..1ab0fd32 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -12,6 +12,7 @@ from GPy.core.parameterization.transformations import NegativeLogexp, Logistic
 from GPy.core.parameterization.parameterized import Parameterized
 from GPy.core.parameterization.param import Param
 from GPy.core.parameterization.index_operations import ParameterIndexOperations
+from functools import reduce
 
 class ArrayCoreTest(unittest.TestCase):
     def setUp(self):
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index c79e9914..777d0d6b 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -19,6 +19,7 @@ from GPy.kern._src.static import Bias, White
 from GPy.examples.dimensionality_reduction import mrd_simulation
 from GPy.core.parameterization.variational import NormalPosterior
 from GPy.models.gp_regression import GPRegression
+from functools import reduce
 
 def toy_model():
     X = np.linspace(0,1,50)[:, None]

From 40f5f4b865d961e2a9449d976371ebc0d556f94f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 19:08:01 +0000
Subject: [PATCH 066/166] has_key has been removed from Python 3

---
 GPy/core/parameterization/parameter_core.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index b38d9678..7af40860 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -139,9 +139,9 @@ class Pickleable(object):
             which = self
         which.traverse_parents(parents.append) # collect parents
         for p in parents:
-            if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
-        if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
-        if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
+            if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied
+        if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient
+        if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
         copy = copy.deepcopy(self, memo) # and start the copy
         copy._parent_index_ = None
         copy._trigger_params_changed()

From 8d66b7b4f03217368258e67581b881b0bc0d8a78 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 28 Feb 2015 19:15:46 +0000
Subject: [PATCH 067/166] Need to explicitly turn a range object to a list in
 for these tests

---
 GPy/testing/parameterized_tests.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index 1ab0fd32..f3e0863f 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -121,15 +121,15 @@ class ParameterizedTest(unittest.TestCase):
     def test_default_constraints(self):
         self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
-        self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
+        self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), list(range(2)))
         from GPy.core.parameterization.transformations import Logexp
         kern = self.test1.kern
         self.test1.unlink_parameter(kern)
-        self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
+        self.assertListEqual(kern.constraints[Logexp()].tolist(), list(range(3)))
 
     def test_constraints(self):
         self.rbf.constrain(GPy.transformations.Square(), False)
-        self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), list(range(self.param.size, self.param.size+self.rbf.size)))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
 
         self.test1.kern.unlink_parameter(self.rbf)
@@ -182,8 +182,8 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_add_parameter_in_hierarchy(self):
         self.test1.kern.rbf.link_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
-        self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
-        self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
+        self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), list(range(self.param.size+1, self.param.size+1 + 2)))
+        self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), list(range(self.param.size)))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
 
     def test_regular_expression_misc(self):

From 560950466d63eaa7b78d8a8215bdb8f5e228b818 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sun, 1 Mar 2015 09:24:22 +0000
Subject: [PATCH 068/166] itertools fixes from 2to3

---
 GPy/core/model.py                          |  2 +-
 GPy/core/parameterization/param.py         | 14 +++++++-------
 GPy/core/parameterization/parameterized.py |  6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 08a4ea25..9521733c 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -364,7 +364,7 @@ class Model(Parameterized):
             gradient = self._grads(x).copy()
             np.where(gradient == 0, 1e-312, gradient)
             ret = True
-            for nind, xind in itertools.izip(param_index, transformed_index):
+            for nind, xind in zip(param_index, transformed_index):
                 xx = x.copy()
                 xx[xind] += step
                 f1 = self._objective(xx)
diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index fbbb59ed..c7e76c98 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -279,7 +279,7 @@ class Param(Parameterizable, ObsAr):
 .tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
 .tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}
 .tg .tg-right{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:right;}
-</style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td  class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
+</style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td  class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
 
     def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
         filter_ = self._current_slice_
@@ -300,7 +300,7 @@ class Param(Parameterizable, ObsAr):
         if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp)  # nice header for printing
         else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__)  # nice header for printing
         if not ties: ties = itertools.cycle([''])
-        return "\n".join([header] + ["  {i!s:^{3}s}  |  {x: >{1}.{2}g}  |  {c:^{0}s}  |  {p:^{5}s}  |  {t:^{4}s}  ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)])  # return all the constraints with right indices
+        return "\n".join([header] + ["  {i!s:^{3}s}  |  {x: >{1}.{2}g}  |  {c:^{0}s}  |  {p:^{5}s}  |  {t:^{4}s}  ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)])  # return all the constraints with right indices
         # except: return super(Param, self).__str__()
 
 class ParamConcatenation(object):
@@ -429,14 +429,14 @@ class ParamConcatenation(object):
         params = self.params
         constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params))
         indices = [p._indices() for p in params]
-        lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in itertools.izip(params, constr_matrices)])
+        lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in zip(params, constr_matrices)])
         lx = max([p._max_len_values() for p in params])
-        li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)])
-        lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)])
-        lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in itertools.izip(params, prior_matrices)])
+        li = max([p._max_len_index(i) for p, i in zip(params, indices)])
+        lt = max([p._max_len_names(tm, __tie_name__) for p, tm in zip(params, ties_matrices)])
+        lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in zip(params, prior_matrices)])
         strings = []
         start = True
-        for p, cm, i, tm, pm in itertools.izip(params,constr_matrices,indices,ties_matrices,prior_matrices):
+        for p, cm, i, tm, pm in zip(params,constr_matrices,indices,ties_matrices,prior_matrices):
             strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start)))
             start = False
         return "\n".join(strings)
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index db945016..27ecbc1c 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -269,7 +269,7 @@ class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
         """
         if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
         found_params = []
-        for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
+        for n, p in zip(self.parameter_names(False, False, True), self.flattened_parameters):
             if regexp.match(n) is not None:
                 found_params.append(p)
         return found_params
@@ -380,7 +380,7 @@ class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
         pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
         format_spec = "<tr><td class=tg-left>{{name:<{0}s}}</td><td class=tg-right>{{desc:>{1}s}}</td><td class=tg-left>{{const:^{2}s}}</td><td class=tg-left>{{pri:^{3}s}}</td><td class=tg-left>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
         to_print = []
-        for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
+        for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
             to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
         sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
         if header:
@@ -415,7 +415,7 @@ class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
         pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
         format_spec = "  \033[1m{{name:<{0}s}}\033[0;0m  |  {{desc:>{1}s}}  |  {{const:^{2}s}}  |  {{pri:^{3}s}}  |  {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
         to_print = []
-        for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
+        for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
             to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
         sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
         if header:

From a0dc90596c348bb75459a8175baf4f6916362208 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sun, 1 Mar 2015 10:17:21 +0000
Subject: [PATCH 069/166] Commented out weave functions for Py3 support

---
 GPy/util/choleskies.py | 108 ++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 56 deletions(-)

diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index 606229f7..c8c8227f 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -2,10 +2,9 @@
 # Licensed under the GNU GPL version 3.0
 
 import numpy as np
-from scipy import weave
+#from scipy import weave
 from . import linalg
 
-
 def safe_root(N):
     i = np.sqrt(N)
     j = int(i)
@@ -13,58 +12,58 @@ def safe_root(N):
         raise ValueError("N is not square!")
     return j
 
-def flat_to_triang(flat):
-    """take a matrix N x D and return a M X M x D array where
+#def flat_to_triang(flat):
+#    """take a matrix N x D and return a M X M x D array where
+#
+#    N = M(M+1)/2
+#
+#    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
+#    """
+#    N, D = flat.shape
+#    M = (-1 + safe_root(8*N+1))/2
+#    ret = np.zeros((M, M, D))
+#    flat = np.ascontiguousarray(flat)
+#
+#    code = """
+#    int count = 0;
+#    for(int m=0; m<M; m++)
+#    {
+#      for(int mm=0; mm<=m; mm++)
+#      {
+#        for(int d=0; d<D; d++)
+#        {
+#          ret[d + m*D*M + mm*D] = flat[count];
+#          count++;
+#        }
+#      }
+#    }
+#    """
+#   weave.inline(code, ['flat', 'ret', 'D', 'M'])
+#    return ret
 
-    N = M(M+1)/2
-
-    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
-    """
-    N, D = flat.shape
-    M = (-1 + safe_root(8*N+1))/2
-    ret = np.zeros((M, M, D))
-    flat = np.ascontiguousarray(flat)
-
-    code = """
-    int count = 0;
-    for(int m=0; m<M; m++)
-    {
-      for(int mm=0; mm<=m; mm++)
-      {
-        for(int d=0; d<D; d++)
-        {
-          ret[d + m*D*M + mm*D] = flat[count];
-          count++;
-        }
-      }
-    }
-    """
-    weave.inline(code, ['flat', 'ret', 'D', 'M'])
-    return ret
-
-def triang_to_flat(L):
-    M, _, D = L.shape
-
-    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
-
-    N = M*(M+1)/2
-    flat = np.empty((N, D))
-    code = """
-    int count = 0;
-    for(int m=0; m<M; m++)
-    {
-      for(int mm=0; mm<=m; mm++)
-      {
-        for(int d=0; d<D; d++)
-        {
-          flat[count] = L[d + m*D*M + mm*D];
-          count++;
-        }
-      }
-    }
-    """
-    weave.inline(code, ['flat', 'L', 'D', 'M'])
-    return flat
+#def triang_to_flat(L):
+#    M, _, D = L.shape
+#
+#    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
+#
+#    N = M*(M+1)/2
+#    flat = np.empty((N, D))
+#    code = """
+#    int count = 0;
+#    for(int m=0; m<M; m++)
+#    {
+#      for(int mm=0; mm<=m; mm++)
+#      {
+#        for(int d=0; d<D; d++)
+#        {
+#          flat[count] = L[d + m*D*M + mm*D];
+#          count++;
+#        }
+#      }
+#    }
+#    """
+#    weave.inline(code, ['flat', 'L', 'D', 'M'])
+#    return flat
 
 def triang_to_cov(L):
     return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in xrange(L.shape[-1])])
@@ -93,9 +92,6 @@ def multiple_dpotri_old(Ls):
 def multiple_dpotri(Ls):
     return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])
 
-
-
-
 def indexes_to_fix_for_low_rank(rank, size):
     """
     work out which indexes of the flatteneed array should be fixed if we want the cholesky to represent a low rank matrix

From 34103ca37c44032fffe7ac7ce54faa7a47641769 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sun, 1 Mar 2015 10:18:27 +0000
Subject: [PATCH 070/166] Commented out weave functions for Py3 support

---
 GPy/kern/_src/stationary.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 5052b7b0..0465a556 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -8,10 +8,15 @@ from ...core.parameterization.transformations import Logexp
 from ...util.linalg import tdot
 from ... import util
 import numpy as np
-from scipy import integrate, weave
+from scipy import integrate
 from ...util.config import config # for assesing whether to use weave
 from ...util.caching import Cache_this
 
+try:
+    from scipy import weave
+except ImportError:
+    config.set('weave', 'working', 'False')
+
 class Stationary(Kern):
     """
     Stationary kernels (covariance functions).
@@ -167,9 +172,9 @@ class Stationary(Kern):
                 except:
                     print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
                     config.set('weave', 'working', 'False')
-                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
+                    self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
             else:
-                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
+                self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
         else:
             r = self._scaled_dist(X, X2)
             self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@@ -234,7 +239,7 @@ class Stationary(Kern):
 
         #the lower memory way with a loop
         ret = np.empty(X.shape, dtype=np.float64)
-        for q in xrange(self.input_dim):
+        for q in range(self.input_dim):
             np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
         ret /= self.lengthscale**2
 

From 153a110a1dbf1f4a9b03b66085c4eb3f50d5d88b Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sun, 1 Mar 2015 10:25:52 +0000
Subject: [PATCH 071/166] Updated README.m to refelect recent Py3 work

---
 README.md | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 98613ce5..a28bc827 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,16 @@ A Gaussian processes framework in Python.
 
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
+### Moving to Python 3
+Work is underway to make GPy run on Python 3.
+
+* Python 2.x compatibility is currently broken in this fork
+* Running the 'dict' fixer from 2to3 caused more problems than it fixed! Trying to figure out why.
+* The test suite runs but with fewer tests for some reason -- not sure why yet.
+* Many tests in the suite fail! Don't even think about using this fork for production use
+* All weave functions not covered by the test suite are simply commented out. Can add equivalents later as test functions become available
+* Examples that required optimised versions of functions for speed reasons would be valued
+
 ### Citation
 
     @Misc{gpy2014,
@@ -109,24 +119,7 @@ Run nosetests from the root directory of the repository:
 or from within IPython
 
     import GPy; GPy.tests()
-
-### Moving to Python 3
-Work is underway to make GPy run on Python 3. We are not there yet! Changes performed so far have retained compatibility with Python 2.6 and above.
-
-Work done so far:
-
-* Used 2to3 to fix relative imports
-* Used 2to3 to convert print from statement to function. Some advanced uses of print meant that this could not be done in a way that retained compatibility with old versions of Python. The oldest version of Python that is supported by this version is 2.6 due to the required future imports.
-* Used 2to3 to convert exceptions to Python 3 friendly versions. There are a few oustanding string exceptions to take care of that 2to3 doesn't handle. Will need to do these manually
-* Handled the different imports required for ConfigParser/configparser in Py2/Py3
-* In utils/linalg.py:
-    * Commented out the function cholupdate(L, x) since it doesn't appear to be used. Its definitely not in the tests.s
-    * Put the import for scipy.weave in a try/except block so that it will gracefully fail in Py3
-* Fixed a couple of urllib2 issues - had to be done mannual since 2to3 didn't help
-		       
-
-
-
+		    
 ## Funding Acknowledgements
 
 

From 6aca7c2765ef4e81d93e929510d12778a5ed5331 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 19:39:50 +0000
Subject: [PATCH 072/166] Changed refereences to iteritems() to items() for Py3
 compat

---
 GPy/core/parameterization/index_operations.py | 56 ++++++++++++-------
 GPy/core/parameterization/param.py            | 12 +++-
 GPy/core/parameterization/parameter_core.py   | 34 ++++++++---
 3 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 1e97f488..4050dc55 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -63,16 +63,15 @@ class ParameterIndexOperations(object):
     def __init__(self, constraints=None):
         self._properties = IntArrayDict()
         if constraints is not None:
-            for t, i in constraints.iteritems():
+            #python 3 fix
+            #for t, i in constraints.iteritems():
+            for t, i in constraints.items():
                 self.add(t, i)
 
-    def iteritems(self):
-        try:
-            return self._properties.iteritems()
-        except AttributeError:
-	#Changed this from iteritems to items for Py3 compatibility. It didn't break the test suite.
-            return self._properties.items()
-
+    #iteritems has gone in python 3
+    #def iteritems(self):
+    #    return self._properties.iteritems()
+        
     def items(self):
         return self._properties.items()
 
@@ -159,14 +158,18 @@ class ParameterIndexOperations(object):
         return numpy.array([]).astype(int)
 
     def update(self, parameter_index_view, offset=0):
-        for i, v in parameter_index_view.iteritems():
+        #py3 fix
+        #for i, v in parameter_index_view.iteritems():
+        for i, v in parameter_index_view.items():
             self.add(i, v+offset)
 
     def copy(self):
         return self.__deepcopy__(None)
 
     def __deepcopy__(self, memo):
-        return ParameterIndexOperations(dict(self.iteritems()))
+        #py3 fix
+        #return ParameterIndexOperations(dict(self.iteritems()))
+        return ParameterIndexOperations(dict(self.items()))
 
     def __getitem__(self, prop):
         return self._properties[prop]
@@ -204,22 +207,25 @@ class ParameterIndexOperationsView(object):
     def _filter_index(self, ind):
         return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset
 
-
-    def iteritems(self):
-        for i, ind in self._param_index_ops.iteritems():
+    #iteritems has gone in python 3. It has been renamed items()
+    def items(self):
+        for i, ind in self._param_index_ops.items():
             ind2 = self._filter_index(ind)
             if ind2.size > 0:
                 yield i, ind2
-
-    def items(self):
-        return [[i,v] for i,v in self.iteritems()]
+    
+    #Python 3 items() is now implemented as per py2 iteritems
+    #def items(self):
+    #    return [[i,v] for i,v in self.iteritems()]
 
     def properties(self):
         return [i for i in self.iterproperties()]
 
 
     def iterproperties(self):
-        for i, _ in self.iteritems():
+        #py3 fix
+        #for i, _ in self.iteritems():
+        for i, _ in self.items():
             yield i
 
 
@@ -239,7 +245,9 @@ class ParameterIndexOperationsView(object):
 
 
     def iterindices(self):
-        for _, ind in self.iteritems():
+        #py3 fix
+        #for _, ind in self.iteritems():
+        for _, ind in self.items():
             yield ind
 
 
@@ -295,10 +303,14 @@ class ParameterIndexOperationsView(object):
 
     def __str__(self, *args, **kwargs):
         import pprint
-        return pprint.pformat(dict(self.iteritems()))
+        #py3 fixes
+        #return pprint.pformat(dict(self.iteritems()))
+        return pprint.pformat(dict(self.items()))
 
     def update(self, parameter_index_view, offset=0):
-        for i, v in parameter_index_view.iteritems():
+        #py3 fixes
+        #for i, v in parameter_index_view.iteritems():
+        for i, v in parameter_index_view.items():
             self.add(i, v+offset)
 
 
@@ -306,6 +318,8 @@ class ParameterIndexOperationsView(object):
         return self.__deepcopy__(None)
 
     def __deepcopy__(self, memo):
-        return ParameterIndexOperations(dict(self.iteritems()))
+        #py3 fix
+        #return ParameterIndexOperations(dict(self.iteritems()))
+        return ParameterIndexOperations(dict(self.items()))
     pass
 
diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index c7e76c98..2d8c4d78 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -207,10 +207,14 @@ class Param(Parameterizable, ObsAr):
         return 0
     @property
     def _constraints_str(self):
-        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
+        #py3 fix
+        #return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
+        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.items()))]
     @property
     def _priors_str(self):
-        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
+        #py3 fix
+        #return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
+        return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.items()))]
     @property
     def _ties_str(self):
         return ['']
@@ -336,7 +340,9 @@ class ParamConcatenation(object):
                     level += 1
                     parent = parent._parent_
         import operator
-        self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
+        #py3 fix
+        #self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
+        self.parents = map(lambda x: x[0], sorted(parents.tems(), key=operator.itemgetter(1)))
     #===========================================================================
     # Get/set items, enable broadcasting
     #===========================================================================
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 7af40860..c72661e4 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -164,7 +164,9 @@ class Pickleable(object):
                        '_Cacher_wrap__cachers', # never pickle cachers
                        ]
         dc = dict()
-        for k,v in self.__dict__.iteritems():
+        #py3 fix
+        #for k,v in self.__dict__.iteritems():
+        for k,v in self.__dict__.items():
             if k not in ignore_list:
                 dc[k] = v
         return dc
@@ -427,7 +429,9 @@ class Indexable(Nameable, Updateable):
         """evaluate the prior"""
         if self.priors.size > 0:
             x = self.param_array
-            return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
+            #py3 fix
+            #return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
+            return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
         return 0.
 
     def _log_prior_gradients(self):
@@ -435,7 +439,9 @@ class Indexable(Nameable, Updateable):
         if self.priors.size > 0:
             x = self.param_array
             ret = np.zeros(x.size)
-            [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
+            #py3 fix
+            #[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
+            [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
             return ret
         return 0.
 
@@ -613,7 +619,9 @@ class OptimizationHandlable(Indexable):
 
         if not self._optimizer_copy_transformed:
             self._optimizer_copy_.flat = self.param_array.flat
-            [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
+            #py3 fix
+            #[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
+            [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__]
             if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
                 fixes = np.ones(self.size).astype(bool)
                 fixes[self.constraints[__fixed__]] = FIXED
@@ -642,11 +650,15 @@ class OptimizationHandlable(Indexable):
         if f is None:
             self.param_array.flat = p
             [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
-             for c, ind in self.constraints.iteritems() if c != __fixed__]
+             #py3 fix
+             #for c, ind in self.constraints.iteritems() if c != __fixed__]
+             for c, ind in self.constraints.items() if c != __fixed__]
         else:
             self.param_array.flat[f] = p
             [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
-             for c, ind in self.constraints.iteritems() if c != __fixed__]
+             #py3 fix
+             #for c, ind in self.constraints.iteritems() if c != __fixed__]
+             for c, ind in self.constraints.items() if c != __fixed__]
         #self._highest_parent_.tie.propagate_val()
 
         self._optimizer_copy_transformed = False
@@ -681,7 +693,9 @@ class OptimizationHandlable(Indexable):
         constraint to it.
         """
         self._highest_parent_.tie.collate_gradient()
-        [np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
+        #py3 fix
+        #[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
+        [np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
         if self._has_fixes(): return g[self._fixes_]
         return g
 
@@ -691,6 +705,8 @@ class OptimizationHandlable(Indexable):
         constraint to it.
         """
         self._highest_parent_.tie.collate_gradient()
+        #py3 fix
+        #[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
         [np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
         if self._has_fixes(): return g[self._fixes_]
         return g
@@ -751,7 +767,9 @@ class OptimizationHandlable(Indexable):
         self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
         # now draw from prior where possible
         x = self.param_array.copy()
-        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
+        #Py3 fix
+        #[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
+        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None]
         unfixlist = np.ones((self.size,),dtype=np.bool)
         unfixlist[self.constraints[__fixed__]] = False
         self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]

From 46fc08a448a28c8690f41fa2ef8b5bfd8f7ebd05 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 19:43:26 +0000
Subject: [PATCH 073/166] cPickle fix for Py3

---
 GPy/core/parameterization/parameter_core.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index c72661e4..1e101a9d 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -110,7 +110,10 @@ class Pickleable(object):
                   it properly.
         :param protocol: pickling protocol to use, python-pickle for details.
         """
-        import cPickle as pickle
+        try: #Py2
+            import cPickle as pickle
+        except ImportError: #Py3
+            import pickle
         if isinstance(f, str):
             with open(f, 'wb') as f:
                 pickle.dump(self, f, protocol)

From 82722305c35c89c1dbec75348bad3b79ea07e951 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 19:50:41 +0000
Subject: [PATCH 074/166] Changed refereences to iteritems() to items() for Py3
 compat

---
 GPy/core/parameterization/param.py          |  2 +-
 GPy/core/parameterization/parameter_core.py |  2 +-
 GPy/testing/pickle_tests.py                 | 17 +++++++++++++----
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index 2d8c4d78..09369efa 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -342,7 +342,7 @@ class ParamConcatenation(object):
         import operator
         #py3 fix
         #self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
-        self.parents = map(lambda x: x[0], sorted(parents.tems(), key=operator.itemgetter(1)))
+        self.parents = map(lambda x: x[0], sorted(parents.items(), key=operator.itemgetter(1)))
     #===========================================================================
     # Get/set items, enable broadcasting
     #===========================================================================
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 1e101a9d..bfe325a3 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -710,7 +710,7 @@ class OptimizationHandlable(Indexable):
         self._highest_parent_.tie.collate_gradient()
         #py3 fix
         #[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
-        [np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
+        [np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
         if self._has_fixes(): return g[self._fixes_]
         return g
 
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index 777d0d6b..251b7cce 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -29,7 +29,9 @@ def toy_model():
 
 class ListDictTestCase(unittest.TestCase):
     def assertListDictEquals(self, d1, d2, msg=None):
-        for k,v in d1.iteritems():
+        #py3 fix
+        #for k,v in d1.iteritems():
+        for k,v in d1.items():
             self.assertListEqual(list(v), list(d2[k]), msg)
     def assertArrayListEquals(self, l1, l2):
         for a1, a2 in itertools.izip(l1,l2):
@@ -39,8 +41,13 @@ class Test(ListDictTestCase):
     def test_parameter_index_operations(self):
         pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130]))
         piov = ParameterIndexOperationsView(pio, 20, 250)
-        self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems()))
-        self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items()))
+        #py3 fix
+        #self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems()))
+        self.assertListDictEquals(dict(piov.items()), dict(piov.copy().items()))
+
+        #py3 fix
+        #self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items()))
+        self.assertListDictEquals(dict(pio.items()), dict(pio.copy().items()))
 
         self.assertArrayListEquals(pio.copy().indices(), pio.indices())
         self.assertArrayListEquals(piov.copy().indices(), piov.indices())
@@ -55,7 +62,9 @@ class Test(ListDictTestCase):
             pickle.dump(piov, f)
             f.seek(0)
             pio2 = pickle.load(f)
-            self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems()))
+            #py3 fix
+            #self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems()))
+            self.assertListDictEquals(dict(piov.items()), dict(pio2.items()))
 
     def test_param(self):
         param = Param('test', np.arange(4*2).reshape(4,2))

From 5607bd9a193ce45de825d0c88d787aae6abc59b7 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 20:13:21 +0000
Subject: [PATCH 075/166] Various Py3 related import fixes

---
 GPy/core/model.py                  | 1 +
 GPy/core/parameterization/param.py | 1 +
 GPy/models/mrd.py                  | 4 ++--
 GPy/testing/pickle_tests.py        | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 9521733c..097f7b5a 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,6 +13,7 @@ import itertools
 import sys
 from .verbose_optimization import VerboseOptimization
 # import numdifftools as ndt
+from functools import reduce
 
 class Model(Parameterized):
     _fail_count = 0  # Count of failed optimization steps (see objective)
diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index 09369efa..1838f2bf 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -6,6 +6,7 @@ import numpy
 np = numpy
 from .parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
 from .observable_array import ObsAr
+from functools import reduce
 
 ###### printing
 __constraints_name__ = "Constraint"
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index f3e643c9..f56873fa 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -82,7 +82,7 @@ class MRD(BayesianGPLVMMiniBatch):
         assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
 
         if inference_method is None:
-            self.inference_method = InferenceMethodList([VarDTC() for _ in xrange(len(self.Ylist))])
+            self.inference_method = InferenceMethodList([VarDTC() for _ in range(len(self.Ylist))])
         else:
             assert isinstance(inference_method, InferenceMethodList), "please provide one inference method per Y in the list and provide it as InferenceMethodList, inference_method given: {}".format(inference_method)
             self.inference_method = inference_method
@@ -338,4 +338,4 @@ class MRD(BayesianGPLVMMiniBatch):
         super(MRD, self).__setstate__(state)
         self.kern = self.bgplvms[0].kern
         self.likelihood = self.bgplvms[0].likelihood
-        self.parameters_changed()
\ No newline at end of file
+        self.parameters_changed()
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index 251b7cce..fd1bf93c 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -34,7 +34,7 @@ class ListDictTestCase(unittest.TestCase):
         for k,v in d1.items():
             self.assertListEqual(list(v), list(d2[k]), msg)
     def assertArrayListEquals(self, l1, l2):
-        for a1, a2 in itertools.izip(l1,l2):
+        for a1, a2 in zip(l1,l2):
             np.testing.assert_array_equal(a1, a2)
 
 class Test(ListDictTestCase):

From 3faf345969b8aff9e859d707de3557004421327f Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 20:22:04 +0000
Subject: [PATCH 076/166] Import fixes for Py3

---
 GPy/kern/_src/periodic.py | 1 +
 GPy/kern/_src/prod.py     | 1 +
 GPy/util/pca.py           | 5 +++--
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 36fcb596..23818007 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -8,6 +8,7 @@ from ...util.linalg import mdot
 from ...util.decorators import silence_errors
 from ...core.parameterization.param import Param
 from ...core.parameterization.transformations import Logexp
+from functools import reduce
 
 class Periodic(Kern):
     def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index 84bd1e1d..27a15aab 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -5,6 +5,7 @@ import numpy as np
 from .kern import CombinationKernel
 from ...util.caching import Cache_this
 import itertools
+from functools import reduce
 
 class Prod(CombinationKernel):
     """
diff --git a/GPy/util/pca.py b/GPy/util/pca.py
index f87b9807..7168a28f 100644
--- a/GPy/util/pca.py
+++ b/GPy/util/pca.py
@@ -13,6 +13,7 @@ except:
 from numpy.linalg.linalg import LinAlgError
 from operator import setitem
 import itertools
+from functools import reduce
 
 class PCA(object):
     """
@@ -47,7 +48,7 @@ class PCA(object):
             X_ = numpy.ma.masked_array(X, inan)
             self.mu = X_.mean(0).base
             self.sigma = X_.std(0).base
-        reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
+        reduce(lambda y,x: setitem(x[0], x[1], x[2]), zip(X.T, inan.T, self.mu), None)
         X = X - self.mu
         X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
         return X
@@ -138,4 +139,4 @@ class PCA(object):
             pylab.tight_layout()
         except:
             pass
-        return plots
\ No newline at end of file
+        return plots

From 7eff1d984f2019ba56a799234c961b2354ed85b0 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 20:35:19 +0000
Subject: [PATCH 077/166] Fixed 'dict changed size' errors

---
 GPy/core/parameterization/index_operations.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 4050dc55..3d6ce64d 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -87,7 +87,7 @@ class ParameterIndexOperations(object):
             ind[toshift] += size
 
     def shift_left(self, start, size):
-        for v, ind in self.items():
+        for v, ind in list(self.items()):
             todelete = (ind>=start) * (ind<start+size)
             if todelete.size != 0:
                 ind = ind[~todelete]
@@ -209,7 +209,8 @@ class ParameterIndexOperationsView(object):
 
     #iteritems has gone in python 3. It has been renamed items()
     def items(self):
-        for i, ind in self._param_index_ops.items():
+        _items_list = list(self._param_index_ops.items())
+        for i, ind in _items_list:
             ind2 = self._filter_index(ind)
             if ind2.size > 0:
                 yield i, ind2

From 57dd29a0f9826d3ecc360fc6119efbeb05bf8d39 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Mon, 2 Mar 2015 20:36:10 +0000
Subject: [PATCH 078/166] Updated README now that dict issues are fixed

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index a28bc827..e488fd50 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,6 @@ Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GP
 Work is underway to make GPy run on Python 3.
 
 * Python 2.x compatibility is currently broken in this fork
-* Running the 'dict' fixer from 2to3 caused more problems than it fixed! Trying to figure out why.
 * The test suite runs but with fewer tests for some reason -- not sure why yet.
 * Many tests in the suite fail! Don't even think about using this fork for production use
 * All weave functions not covered by the test suite are simply commented out. Can add equivalents later as test functions become available

From a4c8bb58074f3de548f0e20f4492586aeda2af1b Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 09:31:52 +0000
Subject: [PATCH 079/166] Changed refereences to iteritems() to items() for Py3
 compat

---
 GPy/testing/index_operations_tests.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py
index e2895cd2..a97f1beb 100644
--- a/GPy/testing/index_operations_tests.py
+++ b/GPy/testing/index_operations_tests.py
@@ -121,7 +121,9 @@ class Test(unittest.TestCase):
         self.assertListEqual(removed.tolist(), [0, 2])
 
     def test_misc(self):
-        for k,v in self.param_index.copy()._properties.iteritems():
+        #py3 fix
+        #for k,v in self.param_index.copy()._properties.iteritems():
+        for k,v in self.param_index.copy()._properties.items():
             self.assertListEqual(self.param_index[k].tolist(), v.tolist())
         self.assertEqual(self.param_index.size, 8)
         self.assertEqual(self.view.size, 5)

From fc43f6d3137f55e1290f6b93f5d44afd0c6f9035 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 09:53:27 +0000
Subject: [PATCH 080/166] Map fix for Python 3

---
 GPy/core/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 097f7b5a..6f6f0ee8 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -341,7 +341,7 @@ class Model(Parameterized):
             cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
             cols = np.array(cols) + 5
             header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
-            header_string = map(lambda x: '|'.join(x), [header_string])
+            header_string = list(map(lambda x: '|'.join(x), [header_string]))
             separator = '-' * len(header_string[0])
             print('\n'.join([header_string[0], separator]))
             if target_param is None:

From 317706dfd07a8b4017ce851e602444c26860742d Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 10:39:19 +0000
Subject: [PATCH 081/166] Removed debugger set up command

---
 GPy/testing/kernel_tests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 771028f0..b51034d9 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -188,7 +188,6 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     if not result:
         print(("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:"))
         testmodel.checkgrad(verbose=True)
-        import ipdb;ipdb.set_trace()
         assert(result)
         pass_checks = False
         return False

From 189647032a0d300468123f88687e4a397027c068 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 11:05:59 +0000
Subject: [PATCH 082/166] iterkeys fix for Python 3

---
 GPy/core/parameterization/index_operations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index 3d6ce64d..e4803f37 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -79,7 +79,7 @@ class ParameterIndexOperations(object):
         return self._properties.keys()
 
     def iterproperties(self):
-        return self._properties.iterkeys()
+        return iter(self._properties)
 
     def shift_right(self, start, size):
         for ind in self.iterindices():

From 965f1aa2cb5ac2fc319ca29eb09692e59c84f6a8 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 3 Mar 2015 17:04:15 +0000
Subject: [PATCH 083/166] removed climin dependency unless actually needed

---
 GPy/core/svgp.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py
index 603a64a5..7a449210 100644
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@@ -25,20 +25,21 @@ class SVGP(SparseGP):
 
         Hensman, Matthews and Ghahramani, Scalable Variational GP Classification, ArXiv 1411.2005
         """
-        if batchsize is None:
-            batchsize = X.shape[0]
-
-        self.X_all, self.Y_all = X, Y
-        # how to rescale the batch likelihood in case of minibatches
         self.batchsize = batchsize
-        batch_scale = float(self.X_all.shape[0])/float(self.batchsize)
-        #KL_scale = 1./np.float64(self.mpi_comm.size)
-        KL_scale = 1.0
+        if batchsize is None:
+            X_batch, Y_batch = X, Y
+            KL_scale, batch_scale = 1., 1.
 
-        import climin.util
-        #Make a climin slicer to make drawing minibatches much quicker
-        self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
-        X_batch, Y_batch = self.new_batch()
+        else:
+            self.X_all, self.Y_all = X, Y
+            # how to rescale the batch likelihood in case of minibatches
+            batch_scale = float(self.X_all.shape[0])/float(self.batchsize)
+            KL_scale = 1.0
+
+            import climin.util
+            #Make a climin slicer to make drawing minibatches much quicker
+            self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
+            X_batch, Y_batch = self.new_batch()
 
         #create the SVI inference method
         inf_method = svgp_inf(KL_scale=KL_scale, batch_scale=batch_scale)

From fff110ca1983989a2b4523d8928f9bc57d838469 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 17:51:54 +0000
Subject: [PATCH 084/166] im_self->__self__ fix for python 3

---
 GPy/kern/_src/independent_outputs.py | 4 +++-
 GPy/testing/likelihood_tests.py      | 6 +++---
 GPy/testing/parameterized_tests.py   | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index 10681d57..2a0c2a33 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -94,7 +94,9 @@ class IndependentOutputs(CombinationKernel):
         else:
             slices2 = index_to_slices(X2[:,self.index_dim])
             [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
-        if self.single_kern: kern.gradient = target
+        
+        if self.single_kern: 
+            kern.gradient = target
         else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
 
     def gradients_X(self,dL_dK, X, X2=None):
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index 5feeffa4..3fe01c46 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -29,7 +29,7 @@ def dparam_partial(inst_func, *args):
     def param_func(param_val, param_name, inst_func, args):
         #inst_func.im_self._set_params(param)
         #inst_func.im_self.add_parameter(Param(param_name, param_val))
-        inst_func.im_self[param_name] = param_val
+        inst_func.__self__[param_name] = param_val
         return inst_func(*args)
     return functools.partial(param_func, inst_func=inst_func, args=args)
 
@@ -44,7 +44,7 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
     The number of parameters and N is the number of data
     Need to take a slice out from f and a slice out of df
     """
-    print("\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
+    print("\n{} likelihood: {} vs {}".format(func.__self__.__class__.__name__,
                                            func.__name__, dfunc.__name__))
     partial_f = dparam_partial(func, *args)
     partial_df = dparam_partial(dfunc, *args)
@@ -278,7 +278,7 @@ class TestNoiseModels(object):
                         #}
                     }
 
-        for name, attributes in noise_models.iteritems():
+        for name, attributes in noise_models.items():
             model = attributes["model"]
             if "grad_params" in attributes:
                 params = attributes["grad_params"]
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index f3e0863f..0fb129ff 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -108,7 +108,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
-        self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
+        self.assertListEqual(self.test1.constraints[Logexp()].tolist(), list(range(self.param.size, self.param.size+self.rbf.size)))
 
     def test_remove_parameter_param_array_grad_array(self):
         val = self.test1.kern.param_array.copy()

From 4642f5ac2b4a044d78ab4e55ff15107b892945f2 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 17:54:05 +0000
Subject: [PATCH 085/166] types.TupleType -> tuple fix for python 3

---
 GPy/util/linalg.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 7f1a28f3..ec66cc09 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -214,12 +214,12 @@ def mdot(*args):
 
 def _mdot_r(a, b):
     """Recursive helper for mdot"""
-    if type(a) == types.TupleType:
+    if type(a) == tuple:
         if len(a) > 1:
             a = mdot(*a)
         else:
             a = a[0]
-    if type(b) == types.TupleType:
+    if type(b) == tuple:
         if len(b) > 1:
             b = mdot(*b)
         else:
@@ -362,7 +362,7 @@ def tdot_blas(mat, out=None):
     A = mat.ctypes.data_as(ctypes.c_void_p)
     BETA = c_double(0.0)
     C = out.ctypes.data_as(ctypes.c_void_p)
-    LDC = c_int(np.max(out.strides) // 8)
+    LDC = c_int(np.max(out.strides) / 8)
     dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K),
             byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
 

From 35aec1c6d0a5ee4749972dba2e4bbc06a06fa53b Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Tue, 3 Mar 2015 20:47:09 +0000
Subject: [PATCH 086/166] Various Python 3 fixes

---
 GPy/core/parameterization/parameter_core.py | 2 +-
 GPy/util/linalg.py                          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index bfe325a3..195a80c1 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -581,7 +581,7 @@ class Indexable(Nameable, Updateable):
         if len(transforms) == 0:
             transforms = which.properties()
         removed = np.empty((0,), dtype=int)
-        for t in transforms:
+        for t in list(transforms):
             unconstrained = which.remove(t, self._raveled_index())
             removed = np.union1d(removed, unconstrained)
             if t is __fixed__:
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index ec66cc09..2813a30a 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -362,7 +362,7 @@ def tdot_blas(mat, out=None):
     A = mat.ctypes.data_as(ctypes.c_void_p)
     BETA = c_double(0.0)
     C = out.ctypes.data_as(ctypes.c_void_p)
-    LDC = c_int(np.max(out.strides) / 8)
+    LDC = c_int(np.max(out.strides) // 8)
     dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K),
             byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
 
@@ -389,7 +389,7 @@ def DSYR_blas(A, x, alpha=1.):
     """
     N = c_int(A.shape[0])
     LDA = c_int(A.shape[0])
-    UPLO = c_char('l')
+    UPLO = c_char('l'.encode('ascii'))
     ALPHA = c_double(alpha)
     A_ = A.ctypes.data_as(ctypes.c_void_p)
     x_ = x.ctypes.data_as(ctypes.c_void_p)

From 273beca272f41835431fad699b1f68104e1df749 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Wed, 4 Mar 2015 03:08:15 +0000
Subject: [PATCH 087/166] Python 3 metaclass fix

---
 GPy/kern/_src/kern.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index 6ccd315b..2e8ebcb0 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -11,10 +11,11 @@ from functools import reduce
 
 
 
-class Kern(Parameterized):
+class Kern(Parameterized,metaclass=KernCallsViaSlicerMeta):
     #===========================================================================
     # This adds input slice support. The rather ugly code for slicing can be
     # found in kernel_slice_operations
+    # __mataclass__ is ignored in Python 3 - needs to be put in the function definiton
     __metaclass__ = KernCallsViaSlicerMeta
     #===========================================================================
     _support_GPU=False

From 6d2393ae907393ba69b4ab1556e6dc67778bca5e Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Wed, 4 Mar 2015 03:22:44 +0000
Subject: [PATCH 088/166] Various Py3 fixes

---
 GPy/core/parameterization/priors.py       | 18 +++++++++---------
 GPy/inference/optimization/stochastics.py |  2 +-
 GPy/kern/_src/add.py                      |  2 +-
 GPy/models/sparse_gp_minibatch.py         |  6 +++---
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/GPy/core/parameterization/priors.py b/GPy/core/parameterization/priors.py
index a4bbecb3..38cb0d19 100644
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@@ -414,7 +414,7 @@ class DGPLVM_KFDA(Prior):
     def compute_cls(self, x):
         cls = {}
         # Appending each data point to its proper class
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in cls:
                 cls[class_label] = []
@@ -553,7 +553,7 @@ class DGPLVM(Prior):
     def compute_cls(self, x):
         cls = {}
         # Appending each data point to its proper class
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in cls:
                 cls[class_label] = []
@@ -572,7 +572,7 @@ class DGPLVM(Prior):
     # Adding data points as tuple to the dictionary so that we can access indices
     def compute_indices(self, x):
         data_idx = {}
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in data_idx:
                 data_idx[class_label] = []
@@ -591,7 +591,7 @@ class DGPLVM(Prior):
             else:
                 lst_idx = []
             # Here we put indices of each class in to the list called lst_idx_all
-            for m in xrange(len(data_idx[i])):
+            for m in range(len(data_idx[i])):
                 lst_idx.append(data_idx[i][m][0])
             lst_idx_all.append(lst_idx)
         return lst_idx_all
@@ -627,7 +627,7 @@ class DGPLVM(Prior):
             # pdb.set_trace()
             # Calculating Bi
             B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
-        for k in xrange(self.datanum):
+        for k in range(self.datanum):
             for i in data_idx:
                 N_i = float(len(data_idx[i]))
                 if k in lst_idx_all[i]:
@@ -772,7 +772,7 @@ class DGPLVM_T(Prior):
     def compute_cls(self, x):
         cls = {}
         # Appending each data point to its proper class
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in cls:
                 cls[class_label] = []
@@ -791,7 +791,7 @@ class DGPLVM_T(Prior):
     # Adding data points as tuple to the dictionary so that we can access indices
     def compute_indices(self, x):
         data_idx = {}
-        for j in xrange(self.datanum):
+        for j in range(self.datanum):
             class_label = self.get_class_label(self.lbl[j])
             if class_label not in data_idx:
                 data_idx[class_label] = []
@@ -810,7 +810,7 @@ class DGPLVM_T(Prior):
             else:
                 lst_idx = []
             # Here we put indices of each class in to the list called lst_idx_all
-            for m in xrange(len(data_idx[i])):
+            for m in range(len(data_idx[i])):
                 lst_idx.append(data_idx[i][m][0])
             lst_idx_all.append(lst_idx)
         return lst_idx_all
@@ -846,7 +846,7 @@ class DGPLVM_T(Prior):
             # pdb.set_trace()
             # Calculating Bi
             B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
-        for k in xrange(self.datanum):
+        for k in range(self.datanum):
             for i in data_idx:
                 N_i = float(len(data_idx[i]))
                 if k in lst_idx_all[i]:
diff --git a/GPy/inference/optimization/stochastics.py b/GPy/inference/optimization/stochastics.py
index dc71d539..f1532bc5 100644
--- a/GPy/inference/optimization/stochastics.py
+++ b/GPy/inference/optimization/stochastics.py
@@ -30,7 +30,7 @@ class SparseGPMissing(StochasticStorage):
         Thus, we can just make sure the loop goes over self.d every
         time.
         """
-        self.d = xrange(model.Y_normalized.shape[1])
+        self.d = range(model.Y_normalized.shape[1])
 
 class SparseGPStochastics(StochasticStorage):
     """
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 82c84c52..77f0d76e 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -165,7 +165,7 @@ class Add(CombinationKernel):
                 else:
                     eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
             grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
-            [np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
+            [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
         return target_grads
 
     def add(self, other):
diff --git a/GPy/models/sparse_gp_minibatch.py b/GPy/models/sparse_gp_minibatch.py
index d3bbe5fe..10c54d49 100644
--- a/GPy/models/sparse_gp_minibatch.py
+++ b/GPy/models/sparse_gp_minibatch.py
@@ -82,7 +82,7 @@ class SparseGPMiniBatch(SparseGP):
             m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
             message = m_f(-1)
             print(message, end=' ')
-            for d in xrange(overall):
+            for d in range(overall):
                 self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
                 print(' '*(len(message)+1) + '\r', end=' ')
                 message = m_f(d)
@@ -182,11 +182,11 @@ class SparseGPMiniBatch(SparseGP):
             full_values[key][value_indices[key]] += current_values[key]
         """
         for key in current_values.keys():
-            if value_indices is not None and value_indices.has_key(key):
+            if value_indices is not None and key in value_indices:
                 index = value_indices[key]
             else:
                 index = slice(None)
-            if full_values.has_key(key):
+            if key in full_values:
                 full_values[key][index] += current_values[key]
             else:
                 full_values[key] = current_values[key]

From 48821a6b735931fe1193d4d8207ef7bd9dd91667 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 5 Mar 2015 10:26:02 +0000
Subject: [PATCH 089/166] Added binomial likelihood

Also some changes to pass through Y_metadata, where it had previously
been (errorneously) omitted.
---
 .../latent_function_inference/svgp.py         |   2 +-
 GPy/likelihoods/__init__.py                   |   1 +
 GPy/likelihoods/binomial.py                   | 125 ++++++++++++++++++
 GPy/likelihoods/likelihood.py                 |   8 +-
 GPy/likelihoods/poisson.py                    |  11 +-
 5 files changed, 133 insertions(+), 14 deletions(-)
 create mode 100644 GPy/likelihoods/binomial.py

diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 52db242c..1974991b 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -43,7 +43,7 @@ class SVGP(LatentFunctionInference):
 
 
         #quadrature for the likelihood
-        F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)
+        F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
 
         #rescale the F term if working on a batch
         F, dF_dmu, dF_dv =  F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py
index 28e44541..c1064e92 100644
--- a/GPy/likelihoods/__init__.py
+++ b/GPy/likelihoods/__init__.py
@@ -6,3 +6,4 @@ from poisson import Poisson
 from student_t import StudentT
 from likelihood import Likelihood
 from mixed_noise import MixedNoise
+from binomial import Binomial
diff --git a/GPy/likelihoods/binomial.py b/GPy/likelihoods/binomial.py
new file mode 100644
index 00000000..4accaa44
--- /dev/null
+++ b/GPy/likelihoods/binomial.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+import link_functions
+from likelihood import Likelihood
+from scipy import special
+
+class Binomial(Likelihood):
+    """
+    Binomial likelihood
+
+    .. math::
+        p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
+
+    .. Note::
+        Y takes values in either {-1, 1} or {0, 1}.
+        link function should have the domain [0, 1], e.g. probit (default) or Heaviside
+
+    .. See also::
+        likelihood.py, for the parent class
+    """
+    def __init__(self, gp_link=None):
+        if gp_link is None:
+            gp_link = link_functions.Probit()
+
+        super(Binomial, self).__init__(gp_link, 'Binomial')
+
+    def conditional_mean(self, gp, Y_metadata):
+        return self.gp_link(gp)*Y_metadata['trials']
+
+    def pdf_link(self, inv_link_f, y, Y_metadata):
+        """
+        Likelihood function given inverse link of f.
+
+        .. math::
+            p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: likelihood evaluated for this point
+        :rtype: float
+
+        .. Note:
+            Each y_i must be in {0, 1}
+        """
+        return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata))
+
+    def logpdf_link(self, inv_link_f, y, Y_metadata=None):
+        """
+        Log Likelihood function given inverse link of f.
+
+        .. math::
+            \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: log likelihood evaluated at points inverse link of f.
+        :rtype: float
+        """
+        N = Y_metadata['trials']
+        nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
+
+        return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
+
+    def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
+        """
+        Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata must contain 'trials'
+        :returns: gradient of log likelihood evaluated at points inverse link of f.
+        :rtype: Nx1 array
+        """
+        N = Y_metadata['trials']
+        return y/inv_link_f - (N-y)/(1-inv_link_f)
+
+    def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
+        """
+        Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
+        i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j  w.r.t inverse link of f_i and inverse link of f_j.
+
+
+        .. math::
+            \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
+
+        :param inv_link_f: latent variables inverse link of f.
+        :type inv_link_f: Nx1 array
+        :param y: data
+        :type y: Nx1 array
+        :param Y_metadata: Y_metadata not used in binomial
+        :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
+        :rtype: Nx1 array
+
+        .. Note::
+            Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
+            (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
+        """
+        N = Y_metadata['trials']
+        return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
+
+    def samples(self, gp, Y_metadata=None):
+        """
+        Returns a set of samples of observations based on a given value of the latent variable.
+
+        :param gp: latent variable
+        """
+        orig_shape = gp.shape
+        gp = gp.flatten()
+        N = Y_metadata['trials']
+        Ysim = np.random.binomial(N, self.gp_link.transf(gp))
+        return Ysim.reshape(orig_shape)
+
+    def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
+        pass
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 790c6ba4..5dc47cef 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -131,7 +131,7 @@ class Likelihood(Parameterized):
 
         return z, mean, variance
 
-    def variational_expectations(self, Y, m, v, gh_points=None):
+    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         """
         Use Gauss-Hermite Quadrature to compute
 
@@ -158,9 +158,9 @@ class Likelihood(Parameterized):
 
         #evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
         # broadcast needs to be handled carefully.
-        logp = self.logpdf(X,Y[:,None])
-        dlogp_dx = self.dlogpdf_df(X, Y[:,None])
-        d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
+        logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata)
+        dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata)
+        d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata)
 
         #clipping for numerical stability
         #logp = np.clip(logp,-1e9,1e9)
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index ea9b2d10..086a07fd 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -64,8 +64,7 @@ class Poisson(Likelihood):
         :rtype: float
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
+        return -link_f + y*np.log(link_f) - special.gammaln(y+1)
 
     def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
@@ -83,7 +82,6 @@ class Poisson(Likelihood):
         :rtype: Nx1 array
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         return y/link_f - 1
 
     def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
@@ -107,12 +105,7 @@ class Poisson(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
-        hess = -y/(link_f**2)
-        return hess
-        #d2_df = self.gp_link.d2transf_df2(gp)
-        #transf = self.gp_link.transf(gp)
-        #return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
+        return -y/(link_f**2) 
 
     def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """

From 3e25098710f345064f306ea87ba8184aa6f98df2 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 6 Mar 2015 13:06:42 +0000
Subject: [PATCH 090/166] Fixed leaky comprehension behaviour for Py3

---
 GPy/models/mrd.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index f56873fa..0028078f 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -74,6 +74,8 @@ class MRD(BayesianGPLVMMiniBatch):
 
         self.logger.debug("creating observable arrays")
         self.Ylist = [ObsAr(Y) for Y in Ylist]
+        #The next line is a fix for Python 3. It replicates the python 2 behaviour from the above comprehension
+        Y = Ylist[-1]
 
         if Ynames is None:
             self.logger.debug("creating Ynames")

From f627c0b1cd66cd9689562d90eb4fac8a3c505f87 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 6 Mar 2015 14:48:19 +0000
Subject: [PATCH 091/166] 2to3 itertools fixer

---
 GPy/models/mrd.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 0028078f..f6e8c408 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -139,7 +139,7 @@ class MRD(BayesianGPLVMMiniBatch):
 
         self.bgplvms = []
 
-        for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
+        for i, n, k, l, Y, im, bs in zip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
             assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
             md = np.isnan(Y).any()
             spgp = BayesianGPLVMMiniBatch(Y, input_dim, X, X_variance,
@@ -166,7 +166,7 @@ class MRD(BayesianGPLVMMiniBatch):
         self._log_marginal_likelihood = 0
         self.Z.gradient[:] = 0.
         self.X.gradient[:] = 0.
-        for b, i in itertools.izip(self.bgplvms, self.inference_method):
+        for b, i in zip(self.bgplvms, self.inference_method):
             self._log_marginal_likelihood += b._log_marginal_likelihood
 
             self.logger.info('working on im <{}>'.format(hex(id(i))))
@@ -197,7 +197,7 @@ class MRD(BayesianGPLVMMiniBatch):
         elif init in "PCA_single":
             X = np.zeros((Ylist[0].shape[0], self.input_dim))
             fracs = []
-            for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
+            for qs, Y in zip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
                 x,frcs = initialize_latent('PCA', len(qs), Y)
                 X[:, qs] = x
                 fracs.append(frcs)

From 028fa93d3664cf4c6792909c8cb7b6a4f92507ea Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 6 Mar 2015 17:07:35 +0000
Subject: [PATCH 092/166] kern fix. All tests now pass

---
 GPy/kern/_src/independent_outputs.py | 8 ++++----
 GPy/models/mrd.py                    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index 2a0c2a33..aa9dca80 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -94,10 +94,10 @@ class IndependentOutputs(CombinationKernel):
         else:
             slices2 = index_to_slices(X2[:,self.index_dim])
             [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
-        
         if self.single_kern: 
-            kern.gradient = target
-        else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
+            self.kern.gradient = target
+        else:
+            [kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
 
     def gradients_X(self,dL_dK, X, X2=None):
         target = np.zeros(X.shape)
@@ -144,7 +144,7 @@ class IndependentOutputs(CombinationKernel):
             if self.single_kern: target[:] += kern.gradient
             else: target[i][:] += kern.gradient
         [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
-        if self.single_kern: kern.gradient = target
+        if self.single_kern: self.kern.gradient = target
         else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
 
 class Hierarchical(CombinationKernel):
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index f6e8c408..be01b769 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -329,9 +329,9 @@ class MRD(BayesianGPLVMMiniBatch):
 
     def __getstate__(self):
         state = super(MRD, self).__getstate__()
-        if state.has_key('kern'):
+        if 'kern' in state:
             del state['kern']
-        if state.has_key('likelihood'):
+        if 'likelihood' in state:
             del state['likelihood']
         return state
 

From 5eeb2f18e92b5edbf32f037cc387569f82c4d04c Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 7 Mar 2015 07:35:55 +0000
Subject: [PATCH 093/166] Updated README.md for recent Py3 work

---
 README.md | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index e488fd50..2268f2cb 100644
--- a/README.md
+++ b/README.md
@@ -10,14 +10,24 @@ A Gaussian processes framework in Python.
 
 Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
 
-### Moving to Python 3
+### Python 3 Compatibility
 Work is underway to make GPy run on Python 3.
 
 * Python 2.x compatibility is currently broken in this fork
-* The test suite runs but with fewer tests for some reason -- not sure why yet.
-* Many tests in the suite fail! Don't even think about using this fork for production use
-* All weave functions not covered by the test suite are simply commented out. Can add equivalents later as test functions become available
-* Examples that required optimised versions of functions for speed reasons would be valued
+* All tests in the testsuite now run on Python3. To see this for yourself, in Ubuntu 14.04, you can do
+
+    git clone https://github.com/mikecroucher/GPy.git
+    cd GPy
+    git checkout devel
+    nosetests3 GPy/testing
+
+nosetests3 is Ubuntu's way of reffering to the Python 3 version of nosetests. You install it with 
+
+    sudo apt-get install python3-nose
+
+* Test coverage is less than 100% so it is expected that there is still more work to be done. We need more tests and examples to try out.
+* All weave functions not covered by the test suite are *simply commented out*. Can add equivalents later as test functions become available
+* A set of benchmarks would be useful! 
 
 ### Citation
 

From cf1c382acde91048f5bb4cd9496b62d5ddc6aa2c Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 7 Mar 2015 07:49:59 +0000
Subject: [PATCH 094/166] xrange fixes for Python 3

---
 GPy/core/parameterization/ties_and_remappings.py       |  6 +++---
 GPy/examples/dimensionality_reduction.py               |  4 ++--
 GPy/inference/latent_function_inference/posterior.py   |  6 +++---
 .../latent_function_inference/var_dtc_parallel.py      |  2 +-
 GPy/inference/mcmc/hmc.py                              |  6 +++---
 GPy/kern/_src/coregionalize.py                         |  2 +-
 GPy/kern/_src/splitKern.py                             |  4 ++--
 GPy/models/ss_gplvm.py                                 |  2 +-
 GPy/models/ss_mrd.py                                   | 10 +++++-----
 GPy/plotting/matplot_dep/img_plots.py                  |  4 ++--
 GPy/plotting/matplot_dep/maps.py                       |  2 +-
 GPy/plotting/matplot_dep/visualize.py                  |  2 +-
 GPy/util/choleskies.py                                 |  4 ++--
 GPy/util/parallel.py                                   |  4 ++--
 14 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/GPy/core/parameterization/ties_and_remappings.py b/GPy/core/parameterization/ties_and_remappings.py
index bafa8a98..527bc47c 100644
--- a/GPy/core/parameterization/ties_and_remappings.py
+++ b/GPy/core/parameterization/ties_and_remappings.py
@@ -185,7 +185,7 @@ class Tie(Parameterized):
     def _check_change(self):
         changed = False
         if self.tied_param is not None:
-            for i in xrange(self.tied_param.size):
+            for i in range(self.tied_param.size):
                 b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
                 b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
                 if b.sum()==0:
@@ -212,11 +212,11 @@ class Tie(Parameterized):
         if self.tied_param is not None:
             self.tied_param.gradient = 0.
             [np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum()) 
-                for i in xrange(self.tied_param.size)]
+                for i in range(self.tied_param.size)]
     
     def propagate_val(self):
         if self.tied_param is not None:
-            for i in xrange(self.tied_param.size):
+            for i in range(self.tied_param.size):
                 self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]
 
 
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index fe1fa1e5..46107a71 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -653,7 +653,7 @@ def ssgplvm_simulation_linear():
     def sample_X(Q, pi):
         x = np.empty(Q)
         dies = np.random.rand(Q)
-        for q in xrange(Q):
+        for q in range(Q):
             if dies[q] < pi:
                 x[q] = np.random.randn()
             else:
@@ -663,7 +663,7 @@ def ssgplvm_simulation_linear():
     Y = np.empty((N, D))
     X = np.empty((N, Q))
     # Generate data from random sampled weight matrices
-    for n in xrange(N):
+    for n in range(N):
         X[n] = sample_X(Q, pi)
         w = np.random.randn(D, Q)
         Y[n] = np.dot(w, X[n])
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index 73d65df6..ea608cce 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -107,7 +107,7 @@ class Posterior(object):
         if self._precision is None:
             cov = np.atleast_3d(self.covariance)
             self._precision = np.zeros(cov.shape) # if one covariance per dimension
-            for p in xrange(cov.shape[-1]):
+            for p in range(cov.shape[-1]):
                 self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
         return self._precision
 
@@ -125,7 +125,7 @@ class Posterior(object):
             if self._woodbury_inv is not None:
                 winv = np.atleast_3d(self._woodbury_inv)
                 self._woodbury_chol = np.zeros(winv.shape)
-                for p in xrange(winv.shape[-1]):
+                for p in range(winv.shape[-1]):
                     self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
                 #Li = jitchol(self._woodbury_inv)
                 #self._woodbury_chol, _ = dtrtri(Li)
@@ -160,7 +160,7 @@ class Posterior(object):
             elif self._covariance is not None:
                 B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
                 self._woodbury_inv = np.empty_like(B)
-                for i in xrange(B.shape[-1]):
+                for i in range(B.shape[-1]):
                     tmp, _ = dpotrs(self.K_chol, B[:,:,i])
                     self._woodbury_inv[:,:,i], _ = dpotrs(self.K_chol, tmp.T)
         return self._woodbury_inv
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index cb117af1..6f98668f 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -92,7 +92,7 @@ class VarDTC_minibatch(LatentFunctionInference):
         psi0_full = 0.
         YRY_full = 0.
 
-        for n_start in xrange(0,num_data,batchsize):
+        for n_start in range(0,num_data,batchsize):
             n_end = min(batchsize+n_start, num_data)
             if batchsize==num_data:
                 Y_slice = Y
diff --git a/GPy/inference/mcmc/hmc.py b/GPy/inference/mcmc/hmc.py
index ec6399b6..fcc72591 100644
--- a/GPy/inference/mcmc/hmc.py
+++ b/GPy/inference/mcmc/hmc.py
@@ -39,7 +39,7 @@ class HMC:
         :rtype: numpy.ndarray
         """
         params = np.empty((num_samples,self.p.size))
-        for i in xrange(num_samples):
+        for i in range(num_samples):
             self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
             H_old = self._computeH()
             theta_old = self.model.optimizer_array.copy()
@@ -59,7 +59,7 @@ class HMC:
         return params
 
     def _update(self, hmc_iters):
-        for i in xrange(hmc_iters):
+        for i in range(hmc_iters):
             self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
             self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
             self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
@@ -82,7 +82,7 @@ class HMC_shortcut:
 
     def sample(self, m_iters=1000, hmc_iters=20):
         params = np.empty((m_iters,self.p.size))
-        for i in xrange(m_iters):
+        for i in range(m_iters):
             # sample a stepsize from the uniform distribution
             stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
             self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index 1b16fd73..5b91de1c 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -166,7 +166,7 @@ class Coregionalize(Kern):
 
     def update_gradients_diag(self, dL_dKdiag, X):
         index = np.asarray(X, dtype=np.int).flatten()
-        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
+        dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
         self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
         self.kappa.gradient = dL_dKdiag_small
 
diff --git a/GPy/kern/_src/splitKern.py b/GPy/kern/_src/splitKern.py
index 18771cb0..051e492b 100644
--- a/GPy/kern/_src/splitKern.py
+++ b/GPy/kern/_src/splitKern.py
@@ -104,7 +104,7 @@ class SplitKern(CombinationKernel):
             assert len(slices2)<=2, 'The Split kernel only support two different indices'
             target = np.zeros((X.shape[0], X2.shape[0]))
             # diagonal blocks
-            [[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
+            [[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
             if len(slices)>1:
                 [target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
             if len(slices2)>1:
@@ -135,7 +135,7 @@ class SplitKern(CombinationKernel):
         else:
             assert dL_dK.shape==(X.shape[0],X2.shape[0])
             slices2 = index_to_slices(X2[:,self.index_dim])
-            [[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
+            [[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
             if len(slices)>1:
                 [collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
             if len(slices2)>1:
diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py
index a61ad2a0..b8e1c72d 100644
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@@ -71,7 +71,7 @@ class SSGPLVM(SparseGP_MPI):
         self.link_parameter(self.X, index=0)
                 
         if self.group_spike:
-            [self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
+            [self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
         
     def set_X_gradients(self, X, X_grad):
         """Set the gradients of the posterior distribution of X in its specific form."""
diff --git a/GPy/models/ss_mrd.py b/GPy/models/ss_mrd.py
index 036ac095..bd2efce0 100644
--- a/GPy/models/ss_mrd.py
+++ b/GPy/models/ss_mrd.py
@@ -19,10 +19,10 @@ class SSMRD(Model):
                                name='model_'+str(i)) for i,y in enumerate(Ylist)]
         self.add_parameters(*(self.models))
         
-        [[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])] 
-         for i in xrange(self.models[0].X.mean.shape[0])]
-        [[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])] 
-         for i in xrange(self.models[0].X.variance.shape[0])]
+        [[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])] 
+         for i in range(self.models[0].X.mean.shape[0])]
+        [[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])] 
+         for i in range(self.models[0].X.variance.shape[0])]
         
         self.updates = True
         
@@ -31,4 +31,4 @@ class SSMRD(Model):
         self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
 
     def log_likelihood(self):
-        return self._log_marginal_likelihood
\ No newline at end of file
+        return self._log_marginal_likelihood
diff --git a/GPy/plotting/matplot_dep/img_plots.py b/GPy/plotting/matplot_dep/img_plots.py
index 453a904d..5346545d 100644
--- a/GPy/plotting/matplot_dep/img_plots.py
+++ b/GPy/plotting/matplot_dep/img_plots.py
@@ -50,8 +50,8 @@ def plot_2D_images(figure, arr, symmetric=False, pad=None, zoom=None, mode=None,
     
     buf = np.ones((y_size*fig_nrows+pad*(fig_nrows-1), x_size*fig_ncols+pad*(fig_ncols-1), 3),dtype=arr.dtype)
     
-    for y in xrange(fig_nrows):
-        for x in xrange(fig_ncols):
+    for y in range(fig_nrows):
+        for x in range(fig_ncols):
             if y*fig_ncols+x<fig_num:
                 buf[y*y_size+y*pad:(y+1)*y_size+y*pad, x*x_size+x*pad:(x+1)*x_size+x*pad] = arr_color[y*fig_ncols+x,:,:,:3]
     img_plot = ax.imshow(buf, interpolation=interpolation)
diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index 65cecd30..bc69ddc4 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -34,7 +34,7 @@ def plot(shape_records,facecolor='w',edgecolor='k',linewidths=.5, ax=None,xlims=
         par = list(sparts) + [points.shape[0]]
 
         polygs = []
-        for pj in xrange(len(sparts)):
+        for pj in range(len(sparts)):
             polygs.append(Polygon(points[par[pj]:par[pj+1]]))
         ax.add_collection(PatchCollection(polygs,facecolor=facecolor,edgecolor=edgecolor, linewidths=linewidths))
 
diff --git a/GPy/plotting/matplot_dep/visualize.py b/GPy/plotting/matplot_dep/visualize.py
index 97f2c88b..b57b7dfc 100644
--- a/GPy/plotting/matplot_dep/visualize.py
+++ b/GPy/plotting/matplot_dep/visualize.py
@@ -459,7 +459,7 @@ class mocap_data_show(matplotlib_show):
 
     def initialize_axes(self, boundary=0.05):
         """Set up the axes with the right limits and scaling."""
-        bs = [(self.vals[:, i].max()-self.vals[:, i].min())*boundary for i in xrange(3)]
+        bs = [(self.vals[:, i].max()-self.vals[:, i].min())*boundary for i in range(3)]
         self.x_lim = np.array([self.vals[:, 0].min()-bs[0], self.vals[:, 0].max()+bs[0]])
         self.y_lim = np.array([self.vals[:, 1].min()-bs[1], self.vals[:, 1].max()+bs[1]])
         self.z_lim = np.array([self.vals[:, 2].min()-bs[2], self.vals[:, 2].max()+bs[2]])
diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index c8c8227f..7e068933 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -66,12 +66,12 @@ def safe_root(N):
 #    return flat
 
 def triang_to_cov(L):
-    return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in xrange(L.shape[-1])])
+    return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
 
 def multiple_dpotri_old(Ls):
     M, _, D = Ls.shape
     Kis = np.rollaxis(Ls, -1).copy()
-    [dpotri(Kis[i,:,:], overwrite_c=1, lower=1) for i in xrange(D)]
+    [dpotri(Kis[i,:,:], overwrite_c=1, lower=1) for i in range(D)]
     code = """
     for(int d=0; d<D; d++)
     {
diff --git a/GPy/util/parallel.py b/GPy/util/parallel.py
index fab43936..880dae58 100644
--- a/GPy/util/parallel.py
+++ b/GPy/util/parallel.py
@@ -27,7 +27,7 @@ def divide_data(datanum, rank, size):
     
     residue = (datanum)%size
     datanum_list = np.empty((size),dtype=np.int32)
-    for i in xrange(size):
+    for i in range(size):
         if i<residue:
             datanum_list[i] = int(datanum/size)+1
         else:
@@ -38,4 +38,4 @@ def divide_data(datanum, rank, size):
     else:
         size = datanum/size
         offset = size*rank+residue
-    return offset, offset+size, datanum_list
\ No newline at end of file
+    return offset, offset+size, datanum_list

From 6b3b90d9b3e909a1f6266a8cbd266aded952b9a9 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 7 Mar 2015 08:42:05 +0000
Subject: [PATCH 095/166] Fix README.md formatting

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 2268f2cb..8c6c4397 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,7 @@ Work is underway to make GPy run on Python 3.
 * Python 2.x compatibility is currently broken in this fork
 * All tests in the testsuite now run on Python3. To see this for yourself, in Ubuntu 14.04, you can do
 
+
     git clone https://github.com/mikecroucher/GPy.git
     cd GPy
     git checkout devel

From f6b71629e70644e640744dfd856cbd12316bf4d6 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Sat, 7 Mar 2015 08:43:13 +0000
Subject: [PATCH 096/166] Fix README.md formatting

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8c6c4397..60dcbe24 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,9 @@ Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GP
 Work is underway to make GPy run on Python 3.
 
 * Python 2.x compatibility is currently broken in this fork
-* All tests in the testsuite now run on Python3. To see this for yourself, in Ubuntu 14.04, you can do
+* All tests in the testsuite now run on Python3. 
 
+To see this for yourself, in Ubuntu 14.04, you can do
 
     git clone https://github.com/mikecroucher/GPy.git
     cd GPy

From 233c5ee8b48cb300ebe8bc66e59b15a89cbcdada Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 9 Mar 2015 10:27:21 +0000
Subject: [PATCH 097/166] Updated other likelihoods to give back logpdf and
 gradients for each link_f rather than summing on the inside

---
 GPy/likelihoods/exponential.py  |  6 +-----
 GPy/likelihoods/gamma.py        |  6 +-----
 GPy/likelihoods/gaussian.py     | 28 ++++++++++++----------------
 GPy/likelihoods/likelihood.py   |  2 +-
 GPy/likelihoods/poisson.py      |  3 +--
 GPy/likelihoods/student_t.py    | 13 +++----------
 GPy/testing/likelihood_tests.py |  6 +++---
 7 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/GPy/likelihoods/exponential.py b/GPy/likelihoods/exponential.py
index 8110c7d4..eca6ce52 100644
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@@ -57,9 +57,8 @@ class Exponential(Likelihood):
         :rtype: float
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         log_objective = np.log(link_f) - y*link_f
-        return np.sum(log_objective)
+        return log_objective
 
     def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
@@ -77,7 +76,6 @@ class Exponential(Likelihood):
         :rtype: Nx1 array
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         grad = 1./link_f - y
         #grad = y/(link_f**2) - 1./link_f
         return grad
@@ -103,7 +101,6 @@ class Exponential(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         hess = -1./(link_f**2)
         #hess = -2*y/(link_f**3) + 1/(link_f**2)
         return hess
@@ -123,7 +120,6 @@ class Exponential(Likelihood):
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         d3lik_dlink3 = 2./(link_f**3)
         #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
         return d3lik_dlink3
diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py
index c79e196c..9d742d02 100644
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@@ -66,12 +66,11 @@ class Gamma(Likelihood):
         :rtype: float
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         #alpha = self.gp_link.transf(gp)*self.beta
         #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
         alpha = link_f*self.beta
         log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
-        return np.sum(log_objective)
+        return log_objective
 
     def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
@@ -90,7 +89,6 @@ class Gamma(Likelihood):
         :rtype: Nx1 array
 
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
         #old
         #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
@@ -118,7 +116,6 @@ class Gamma(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
         #old
         #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
@@ -140,6 +137,5 @@ class Gamma(Likelihood):
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
         return d3lik_dlink3
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index a6e5b7e0..4e7de9e3 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -130,11 +130,10 @@ class Gaussian(Likelihood):
         :returns: log likelihood evaluated for this point
         :rtype: float
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         N = y.shape[0]
-        ln_det_cov = N*np.log(self.variance)
+        ln_det_cov = np.log(self.variance)
 
-        return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
+        return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
 
     def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
@@ -151,8 +150,7 @@ class Gaussian(Likelihood):
         :returns: gradient of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
-        s2_i = (1.0/self.variance)
+        s2_i = 1.0/self.variance
         grad = s2_i*y - s2_i*link_f
         return grad
 
@@ -178,9 +176,9 @@ class Gaussian(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         N = y.shape[0]
-        hess = -(1.0/self.variance)*np.ones((N, 1))
+        D = link_f.shape[1]
+        hess = -(1.0/self.variance)*np.ones((N, D))
         return hess
 
     def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
@@ -198,9 +196,9 @@ class Gaussian(Likelihood):
         :returns: third derivative of log likelihood evaluated at points link(f)
         :rtype: Nx1 array
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         N = y.shape[0]
-        d3logpdf_dlink3 = np.zeros((N,1))
+        D = link_f.shape[1]
+        d3logpdf_dlink3 = np.zeros((N,D))
         return d3logpdf_dlink3
 
     def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
@@ -218,12 +216,11 @@ class Gaussian(Likelihood):
         :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
         :rtype: float
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         e = y - link_f
         s_4 = 1.0/(self.variance**2)
         N = y.shape[0]
-        dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
-        return np.sum(dlik_dsigma) # Sure about this sum?
+        dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
+        return dlik_dsigma
 
     def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
         """
@@ -240,7 +237,6 @@ class Gaussian(Likelihood):
         :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
         :rtype: Nx1 array
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         s_4 = 1.0/(self.variance**2)
         dlik_grad_dsigma = -s_4*y + s_4*link_f
         return dlik_grad_dsigma
@@ -260,15 +256,15 @@ class Gaussian(Likelihood):
         :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
         :rtype: Nx1 array
         """
-        assert np.asarray(link_f).shape == np.asarray(y).shape
         s_4 = 1.0/(self.variance**2)
         N = y.shape[0]
-        d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
+        D = link_f.shape[1]
+        d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
         return d2logpdf_dlink2_dvar
 
     def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
-        return np.asarray([[dlogpdf_dvar]])
+        return dlogpdf_dvar
 
     def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 5dc47cef..b1e78b93 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -425,7 +425,7 @@ class Likelihood(Parameterized):
             return np.zeros([f.shape[0], 0])
 
     def _laplace_gradients(self, f, y, Y_metadata=None):
-        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
         dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
         d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
 
diff --git a/GPy/likelihoods/poisson.py b/GPy/likelihoods/poisson.py
index 086a07fd..6da3160f 100644
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@@ -105,7 +105,7 @@ class Poisson(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        return -y/(link_f**2) 
+        return -y/(link_f**2)
 
     def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
         """
@@ -122,7 +122,6 @@ class Poisson(Likelihood):
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
         d3lik_dlink3 = 2*y/(link_f)**3
         return d3lik_dlink3
 
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index 855f6b40..dbd4d94f 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -86,7 +86,6 @@ class StudentT(Likelihood):
         :rtype: float
 
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         #FIXME:
         #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
@@ -97,7 +96,7 @@ class StudentT(Likelihood):
                     - 0.5*np.log(self.sigma2 * self.v * np.pi)
                     - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
                     )
-        return np.sum(objective)
+        return objective
 
     def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
         """
@@ -115,7 +114,6 @@ class StudentT(Likelihood):
         :rtype: Nx1 array
 
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
         return grad
@@ -141,7 +139,6 @@ class StudentT(Likelihood):
             Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
             (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
         return hess
@@ -161,7 +158,6 @@ class StudentT(Likelihood):
         :returns: third derivative of likelihood evaluated at points f
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
                        ((e**2 + self.sigma2*self.v)**3)
@@ -183,10 +179,9 @@ class StudentT(Likelihood):
         :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
         :rtype: float
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
-        return np.sum(dlogpdf_dvar)
+        return dlogpdf_dvar
 
     def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
         """
@@ -203,7 +198,6 @@ class StudentT(Likelihood):
         :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
         return dlogpdf_dlink_dvar
@@ -223,7 +217,6 @@ class StudentT(Likelihood):
         :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
         :rtype: Nx1 array
         """
-        assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
         e = y - inv_link_f
         d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
                               / ((self.sigma2*self.v + (e**2))**3)
@@ -246,7 +239,7 @@ class StudentT(Likelihood):
         return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
 
     def predictive_mean(self, mu, sigma, Y_metadata=None):
-        # The comment here confuses mean and median. 
+        # The comment here confuses mean and median.
         return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
 
     def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index 95929098..877d1aa0 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -362,7 +362,7 @@ class TestNoiseModels(object):
     def t_dlogpdf_df(self, model, Y, f):
         print "\n{}".format(inspect.stack()[0][3])
         self.description = "\n{}".format(inspect.stack()[0][3])
-        logpdf = functools.partial(model.logpdf, y=Y)
+        logpdf = functools.partial(np.sum(model.logpdf), y=Y)
         dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
         grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
         grad.randomize()
@@ -652,9 +652,9 @@ class LaplaceTests(unittest.TestCase):
             print m2
         optimizer = 'scg'
         print "Gaussian"
-        m1.optimize(optimizer, messages=debug)
+        m1.optimize(optimizer, messages=debug, ipython_notebook=False)
         print "Laplace Gaussian"
-        m2.optimize(optimizer, messages=debug)
+        m2.optimize(optimizer, messages=debug, ipython_notebook=False)
         if debug:
             print m1
             print m2

From c6cf0bc121f7094a8d10c9b90e057e0c9ab2398b Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 9 Mar 2015 17:26:18 +0000
Subject: [PATCH 098/166] add set_Z function

---
 GPy/core/sparse_gp.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 005ef2ac..376224cb 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -62,6 +62,15 @@ class SparseGP(GP):
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
+    
+    def set_Z(self, Z):
+        self.update_model(False)
+        self.unlink_parameter(self.Z)
+        from ..core import Param
+        self.Z = Param('inducing inputs',Z)
+        self.link_parameter(self.Z, index=0)
+        self.update_model(True)
+        self._trigger_params_changed()
 
     def parameters_changed(self):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)

From 77b4dc7d4408f74eeb55220b94fc2700b4911a1a Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Wed, 11 Mar 2015 10:37:21 +0000
Subject: [PATCH 099/166] [optimize] added clear functionality for ipython
 notebook and kern input sensitivity error handling

---
 GPy/core/model.py                |  2 +-
 GPy/core/verbose_optimization.py | 46 +++++++++++++++++---------------
 GPy/kern/_src/add.py             |  7 +++--
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index c5d318e7..b6ad66f3 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -213,7 +213,7 @@ class Model(Parameterized):
             self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
         return obj_f, self.obj_grads
 
-    def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs):
+    def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
         """
         Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
 
diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index 1a87b3da..bd5afc25 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -11,7 +11,7 @@ def exponents(fnow, current_grad):
     return np.sign(exps) * np.log10(exps).astype(int)
 
 class VerboseOptimization(object):
-    def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True):
+    def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False):
         self.verbose = verbose
         if self.verbose:
             self.model = model
@@ -22,6 +22,7 @@ class VerboseOptimization(object):
             self.opt_name = opt.opt_name
             self.model.add_observer(self, self.print_status)
             self.status = 'running'
+            self.clear = clear_after_finish
 
             self.update()
 
@@ -37,30 +38,31 @@ class VerboseOptimization(object):
                 self.ipython_notebook = False
 
             if self.ipython_notebook:
-                self.text.set_css('width', '100%')
-                #self.progress.set_css('width', '100%')
-
                 left_col = ContainerWidget(children = [self.progress, self.text])
                 right_col = ContainerWidget(children = [self.model_show])
-                hor_align = ContainerWidget(children = [left_col, right_col])
+                self.hor_align = ContainerWidget(children = [left_col, right_col])
 
-                display(hor_align)
+                display(self.hor_align)
+                                
+                try:
+                    self.text.set_css('width', '100%')
+                    left_col.set_css({
+                             'padding': '2px',
+                             'width': "100%",
+                             })
+    
+                    right_col.set_css({
+                             'padding': '2px',
+                             })
+    
+                    self.hor_align.set_css({
+                             'width': "100%",
+                             })
+                except:
+                    pass
 
-                left_col.set_css({
-                         'padding': '2px',
-                         'width': "100%",
-                         })
-
-                right_col.set_css({
-                         'padding': '2px',
-                         })
-
-                hor_align.set_css({
-                         'width': "100%",
-                         })
-
-                hor_align.remove_class('vbox')
-                hor_align.add_class('hbox')
+                self.hor_align.remove_class('vbox')
+                self.hor_align.add_class('hbox')
 
                 left_col.add_class("box-flex1")
                 right_col.add_class('box-flex0')
@@ -148,3 +150,5 @@ class VerboseOptimization(object):
                 print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
                 print 'Optimization status: {0:.5g}'.format(self.status)
                 print
+            elif self.clear:
+                self.hor_align.close()
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 4c72a254..8059f68f 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -180,9 +180,12 @@ class Add(CombinationKernel):
 
     def input_sensitivity(self, summarize=True):
         if summarize:
-            return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
+            i_s = np.zeros((self.input_dim))
+            for k in self.parts:
+                i_s[k.active_dims] += k.input_sensitivity(summarize)
+            return i_s
         else:
             i_s = np.zeros((len(self.parts), self.input_dim))
             from operator import setitem
-            [setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
+            [setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
             return i_s

From d01a0a61e2e0200354fde00284f07ed2d4105527 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Wed, 11 Mar 2015 10:42:44 +0000
Subject: [PATCH 100/166] [optimize] max_f_eval -> max_iters

---
 GPy/core/model.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index b6ad66f3..ad101ec0 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -219,8 +219,8 @@ class Model(Parameterized):
 
         kwargs are passed to the optimizer. They can be:
 
-        :param max_f_eval: maximum number of function evaluations
-        :type max_f_eval: int
+        :param max_iters: maximum number of function evaluations
+        :type max_iters: int
         :messages: True: Display messages during optimisation, "ipython_notebook":
         :type messages: bool"string
         :param optimizer: which optimizer to use (defaults to self.preferred optimizer)

From 7f30fef69896aaccca2bdbfee269e82cb4c6fda7 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Fri, 13 Mar 2015 09:47:36 +0000
Subject: [PATCH 101/166] add trigger update to set_{X,Y,Z}

---
 GPy/core/gp.py        | 16 ++++++++--------
 GPy/core/sparse_gp.py |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 3252ac08..38a7bb3d 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -89,7 +89,7 @@ class GP(Model):
         self.link_parameter(self.kern)
         self.link_parameter(self.likelihood)
 
-    def set_XY(self, X=None, Y=None):
+    def set_XY(self, X=None, Y=None, trigger_update=True):
         """
         Set the input / output data of the model
         This is useful if we wish to change our existing data but maintain the same model
@@ -99,7 +99,7 @@ class GP(Model):
         :param Y: output observations
         :type Y: np.ndarray
         """
-        self.update_model(False)
+        if trigger_update: self.update_model(False)
         if Y is not None:
             if self.normalizer is not None:
                 self.normalizer.scale_by(Y)
@@ -123,26 +123,26 @@ class GP(Model):
                     self.link_parameters(self.X)
             else:
                 self.X = ObsAr(X)
-        self.update_model(True)
-        self._trigger_params_changed()
+        if trigger_update: self.update_model(True)
+        if trigger_update: self._trigger_params_changed()
 
-    def set_X(self,X):
+    def set_X(self,X, trigger_update=True):
         """
         Set the input data of the model
 
         :param X: input observations
         :type X: np.ndarray
         """
-        self.set_XY(X=X)
+        self.set_XY(X=X, trigger_update=trigger_update)
 
-    def set_Y(self,Y):
+    def set_Y(self,Y, trigger_update=True):
         """
         Set the output data of the model
 
         :param X: output observations
         :type X: np.ndarray
         """
-        self.set_XY(Y=Y)
+        self.set_XY(Y=Y, trigger_update=trigger_update)
 
     def parameters_changed(self):
         """
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 376224cb..bac54d8c 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -63,14 +63,14 @@ class SparseGP(GP):
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
     
-    def set_Z(self, Z):
-        self.update_model(False)
+    def set_Z(self, Z, trigger_update=True):
+        if trigger_update: self.update_model(False)
         self.unlink_parameter(self.Z)
         from ..core import Param
         self.Z = Param('inducing inputs',Z)
         self.link_parameter(self.Z, index=0)
-        self.update_model(True)
-        self._trigger_params_changed()
+        if trigger_update: self.update_model(True)
+        if trigger_update: self._trigger_params_changed()
 
     def parameters_changed(self):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)

From e5587cf234684d095516a5881f4d829deceb2cd2 Mon Sep 17 00:00:00 2001
From: Mike Croucher <michael.p.croucher@googlemail.com>
Date: Fri, 13 Mar 2015 14:43:49 +0000
Subject: [PATCH 102/166] Used 'six' to support Py3 and Py2 simultaneously

---
 GPy/core/parameterization/parameterized.py | 10 ++++++----
 GPy/kern/_src/kern.py                      | 11 ++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 27ecbc1c..691bf4a7 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2014, Max Zwiessele, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
+import six # For metaclass support in Python 2 and 3 simultaneously
 import numpy; np = numpy
 import itertools
 from re import compile, _pattern_type
@@ -27,7 +27,8 @@ class ParametersChangedMeta(type):
         self.parameters_changed()
         return self
 
-class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
+@six.add_metaclass(ParametersChangedMeta)
+class Parameterized(Parameterizable):
     """
     Parameterized class
 
@@ -73,8 +74,9 @@ class Parameterized(Parameterizable,metaclass=ParametersChangedMeta):
     # Metaclass for parameters changed after init.
     # This makes sure, that parameters changed will always be called after __init__
     # **Never** call parameters_changed() yourself
-    #This is ignored in Python 3 -- you need to put the meta class in the 
-    __metaclass__ = ParametersChangedMeta
+    #This is ignored in Python 3 -- you need to put the meta class in the function definition. 
+    #__metaclass__ = ParametersChangedMeta
+    #The six module is used to support both Python 2 and 3 simultaneously
     #===========================================================================
     def __init__(self, name=None, parameters=[], *a, **kw):
         super(Parameterized, self).__init__(name=name, *a, **kw)
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index 2e8ebcb0..e63ddad4 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -8,15 +8,16 @@ from .kernel_slice_operations import KernCallsViaSlicerMeta
 from ...util.caching import Cache_this
 from GPy.core.parameterization.observable_array import ObsAr
 from functools import reduce
+import six
 
-
-
-class Kern(Parameterized,metaclass=KernCallsViaSlicerMeta):
+@six.add_metaclass(KernCallsViaSlicerMeta)
+class Kern(Parameterized):
     #===========================================================================
     # This adds input slice support. The rather ugly code for slicing can be
     # found in kernel_slice_operations
-    # __mataclass__ is ignored in Python 3 - needs to be put in the function definiton
-    __metaclass__ = KernCallsViaSlicerMeta
+    # __meataclass__ is ignored in Python 3 - needs to be put in the function definiton
+    #__metaclass__ = KernCallsViaSlicerMeta
+    #Here, we use the Python module six to support Py3 and Py2 simultaneously
     #===========================================================================
     _support_GPU=False
     def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):

From 7152f41f82509824675cef3c81a163492df0dda8 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 19 Mar 2015 11:33:00 +0000
Subject: [PATCH 103/166] fix the param renaming problem

---
 GPy/core/parameterization/parameter_core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index bee160b2..13c2ee59 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -947,7 +947,7 @@ class Parameterizable(OptimizationHandlable):
             self._add_parameter_name(param, ignore_added_names)
         # and makes sure to not delete programmatically added parameters
         for other in self.parameters[::-1]:
-            if other is not param and other.name.startswith(param.name):
+            if other is not param and other.name == param.name:
                 warn_and_retry(param, _name_digit.match(other.name))
                 return
         if pname not in dir(self):
@@ -955,6 +955,7 @@ class Parameterizable(OptimizationHandlable):
             self._added_names_.add(pname)
         elif pname in self.__dict__:
             if pname in self._added_names_:
+                print self._added_names_
                 other = self.__dict__[pname]
                 if not (param is other):
                     del self.__dict__[pname]

From ea5f25900c2372a839167a6815e86e1ce7a20f3f Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 19 Mar 2015 11:33:59 +0000
Subject: [PATCH 104/166] remove printing

---
 GPy/core/parameterization/parameter_core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 13c2ee59..dc083a98 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -955,7 +955,6 @@ class Parameterizable(OptimizationHandlable):
             self._added_names_.add(pname)
         elif pname in self.__dict__:
             if pname in self._added_names_:
-                print self._added_names_
                 other = self.__dict__[pname]
                 if not (param is other):
                     del self.__dict__[pname]

From f7719446290c549ff2ff1f3867cbed96c28be678 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 08:46:46 +0000
Subject: [PATCH 105/166] [optimization] model prints how many parameters there
 are to optimize

---
 GPy/core/model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index ad101ec0..0251d58c 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -402,6 +402,7 @@ class Model(Parameterized):
         model_details = [['<b>Model</b>', self.name + '<br>'],
                          ['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
                          ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
+                         ["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
                          ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
                          ]
         from operator import itemgetter
@@ -419,6 +420,7 @@ class Model(Parameterized):
         model_details = [['Name', self.name],
                          ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
                          ["Number of Parameters", '{}'.format(self.size)],
+                         ["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
                          ["Updates", '{}'.format(self._update_on)],
                          ]
         from operator import itemgetter

From b47e5ab4bbefec54117ed7810047793d376e48b9 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 08:47:45 +0000
Subject: [PATCH 106/166] [ploting init] minor

---
 GPy/plotting/matplot_dep/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/plotting/matplot_dep/__init__.py b/GPy/plotting/matplot_dep/__init__.py
index 4c4402ce..a60b52c2 100644
--- a/GPy/plotting/matplot_dep/__init__.py
+++ b/GPy/plotting/matplot_dep/__init__.py
@@ -15,4 +15,4 @@ import netpbmfile
 import inference_plots
 import maps
 import img_plots
-from ssgplvm import SSGPLVM_plot
+from ssgplvm import SSGPLVM_plot   
\ No newline at end of file

From 7381531d23189d90f42ccf42ce877dd0f6a31f4e Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 08:48:06 +0000
Subject: [PATCH 107/166] [sparse gp] doc changes for missing data

---
 GPy/core/sparse_gp.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index bac54d8c..4fcade79 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -10,11 +10,6 @@ from parameterization.variational import VariationalPosterior, NormalPosterior
 from ..util.linalg import mdot
 
 import logging
-from GPy.inference.latent_function_inference.posterior import Posterior
-from GPy.inference.optimization.stochastics import SparseGPStochastics,\
-    SparseGPMissing
-#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
-    #SparseGPMissing
 logger = logging.getLogger("sparse gp")
 
 class SparseGP(GP):
@@ -24,6 +19,10 @@ class SparseGP(GP):
     This model allows (approximate) inference using variational DTC or FITC
     (Gaussian likelihoods) as well as non-conjugate sparse methods based on
     these.
+    
+    This is not for missing data, as the implementation for missing data involves
+    some inefficient optimization routine decisions.
+    See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
 
     :param X: inputs
     :type X: np.ndarray (num_data x input_dim)
@@ -66,7 +65,6 @@ class SparseGP(GP):
     def set_Z(self, Z, trigger_update=True):
         if trigger_update: self.update_model(False)
         self.unlink_parameter(self.Z)
-        from ..core import Param
         self.Z = Param('inducing inputs',Z)
         self.link_parameter(self.Z, index=0)
         if trigger_update: self.update_model(True)
@@ -120,7 +118,7 @@ class SparseGP(GP):
         
         For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we 
         return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
-        This is for both with and without missing data.
+        This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
         """
 
         if kern is None: kern = self.kern

From 1b46a99e75db506621825f8b53aa223effe8d5cb Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 09:57:53 +0000
Subject: [PATCH 108/166] [optimization] html widget api changes in ipython
 notebook?

---
 GPy/core/verbose_optimization.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index bd5afc25..f84cfc7e 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -58,15 +58,16 @@ class VerboseOptimization(object):
                     self.hor_align.set_css({
                              'width': "100%",
                              })
+
+                    self.hor_align.remove_class('vbox')
+                    self.hor_align.add_class('hbox')
+    
+                    left_col.add_class("box-flex1")
+                    right_col.add_class('box-flex0')
+
                 except:
                     pass
 
-                self.hor_align.remove_class('vbox')
-                self.hor_align.add_class('hbox')
-
-                left_col.add_class("box-flex1")
-                right_col.add_class('box-flex0')
-
                 #self.text.add_class('box-flex2')
                 #self.progress.add_class('box-flex1')
             else:

From 99545500b125d8b247101619c4a4051a786e2108 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 12:38:15 +0000
Subject: [PATCH 109/166] a little work on mappings

---
 GPy/core/mapping.py            | 68 ----------------------------------
 GPy/old_tests/mapping_tests.py | 24 ++++++++++++
 2 files changed, 24 insertions(+), 68 deletions(-)

diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 111fec6f..25fe504a 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -74,72 +74,4 @@ class Bijective_mapping(Mapping):
         """Inverse mapping from output domain of the function to the inputs."""
         raise NotImplementedError
 
-from model import Model
-
-class Mapping_check_model(Model):
-    """
-    This is a dummy model class used as a base class for checking that the
-    gradients of a given mapping are implemented correctly. It enables
-    checkgradient() to be called independently on each mapping.
-    """
-    def __init__(self, mapping=None, dL_df=None, X=None):
-        num_samples = 20
-        if mapping==None:
-            mapping = GPy.mapping.linear(1, 1)
-        if X==None:
-            X = np.random.randn(num_samples, mapping.input_dim)
-        if dL_df==None:
-            dL_df = np.ones((num_samples, mapping.output_dim))
-
-        self.mapping=mapping
-        self.X = X
-        self.dL_df = dL_df
-        self.num_params = self.mapping.num_params
-        Model.__init__(self)
-
-
-    def _get_params(self):
-        return self.mapping._get_params()
-
-    def _get_param_names(self):
-        return self.mapping._get_param_names()
-
-    def _set_params(self, x):
-        self.mapping._set_params(x)
-
-    def log_likelihood(self):
-        return (self.dL_df*self.mapping.f(self.X)).sum()
-
-    def _log_likelihood_gradients(self):
-        raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
-
-class Mapping_check_df_dtheta(Mapping_check_model):
-    """This class allows gradient checks for the gradient of a mapping with respect to parameters. """
-    def __init__(self, mapping=None, dL_df=None, X=None):
-        Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
-
-    def _log_likelihood_gradients(self):
-        return self.mapping.df_dtheta(self.dL_df, self.X)
-
-
-class Mapping_check_df_dX(Mapping_check_model):
-    """This class allows gradient checks for the gradient of a mapping with respect to X. """
-    def __init__(self, mapping=None, dL_df=None, X=None):
-        Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
-
-        if dL_df==None:
-            dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
-        self.num_params = self.X.shape[0]*self.mapping.input_dim
-
-    def _log_likelihood_gradients(self):
-        return self.mapping.df_dX(self.dL_df, self.X).flatten()
-
-    def _get_param_names(self):
-        return ['X_'  +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
-
-    def _get_params(self):
-        return self.X.flatten()
-
-    def _set_params(self, x):
-        self.X=x.reshape(self.X.shape)
 
diff --git a/GPy/old_tests/mapping_tests.py b/GPy/old_tests/mapping_tests.py
index d501df1d..8e4f250d 100644
--- a/GPy/old_tests/mapping_tests.py
+++ b/GPy/old_tests/mapping_tests.py
@@ -5,6 +5,30 @@ import unittest
 import numpy as np
 import GPy
 
+class MappingGradChecker(GPy.core.Model):
+    """
+    This class has everything we need to check the gradient of a mapping. It
+    implement a simple likelihood which is the sum of the outputs of the
+    mapping. the gradients are checked against the parameters of the mapping
+    and the input.
+    """
+    def __init__(self, mapping, X, name):
+        super(MappingChecker).__init__(self, name)
+        self.mapping = mapping
+        self.add_parameter(self.mapping)
+        self.X = GPy.core.Param('X',X)
+        self.add_parameter(self.X)
+        self.dL_dY = np.ones((self.X.shape[0]. self.mapping.output_dim))
+    def log_likelihood(self):
+        return np.sum(self.mapping.f(X))
+    def parameters_changed(self):
+        self.X.gradient = self.mapping.gradients_X(self.dL_dY, self.X)
+        self.mapping.update_gradients(self.dL_dY, self.X)
+
+
+
+
+
 
 
 class MappingTests(unittest.TestCase):

From 9976e56bead213c7095b80ce34a11390a4c33fe6 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 14:24:32 +0000
Subject: [PATCH 110/166] lots of changes to mappings

---
 GPy/core/mapping.py              |  44 +---------
 GPy/mappings/additive.py         |  36 ++------
 GPy/mappings/identity.py         |  26 ++++++
 GPy/mappings/kernel.py           |  62 ++++++--------
 GPy/mappings/linear.py           |  42 +++++-----
 GPy/mappings/mlp.py              | 136 ++++++-------------------------
 GPy/mappings/piecewise_linear.py |  94 +++++++++++++++++++++
 7 files changed, 202 insertions(+), 238 deletions(-)
 create mode 100644 GPy/mappings/identity.py
 create mode 100644 GPy/mappings/piecewise_linear.py

diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 25fe504a..dd45a26e 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2015, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import sys
@@ -7,7 +8,7 @@ import numpy as np
 
 class Mapping(Parameterized):
     """
-    Base model for shared behavior between models that can act like a mapping.
+    Base model for shared mapping behaviours
     """
 
     def __init__(self, input_dim, output_dim, name='mapping'):
@@ -18,49 +19,12 @@ class Mapping(Parameterized):
     def f(self, X):
         raise NotImplementedError
 
-    def df_dX(self, dL_df, X):
-        """Evaluate derivatives of mapping outputs with respect to inputs.
-
-        :param dL_df: gradient of the objective with respect to the function.
-        :type dL_df: ndarray (num_data x output_dim)
-        :param X: the input locations where derivatives are to be evaluated.
-        :type X: ndarray (num_data x input_dim)
-        :returns: matrix containing gradients of the function with respect to the inputs.
-        """
+    def gradients_X(self, dL_dF, X):
         raise NotImplementedError
 
-    def df_dtheta(self, dL_df, X):
-        """The gradient of the outputs of the mapping with respect to each of the parameters.
-
-        :param dL_df: gradient of the objective with respect to the function.
-        :type dL_df: ndarray (num_data x output_dim)
-        :param X: input locations where the function is evaluated.
-        :type X: ndarray (num_data x input_dim)
-        :returns: Matrix containing gradients with respect to parameters of each output for each input data.
-        :rtype: ndarray (num_params length)
-        """
-
+    def update_gradients(self, dL_dF, X):
         raise NotImplementedError
 
-    def plot(self, *args):
-        """
-        Plots the mapping associated with the model.
-          - In one dimension, the function is plotted.
-          - In two dimensions, a contour-plot shows the function
-          - In higher dimensions, we've not implemented this yet !TODO!
-
-        Can plot only part of the data and part of the posterior functions
-        using which_data and which_functions
-
-        This is a convenience function: arguments are passed to
-        GPy.plotting.matplot_dep.models_plots.plot_mapping
-        """
-
-        if "matplotlib" in sys.modules:
-            from ..plotting.matplot_dep import models_plots
-            mapping_plots.plot_mapping(self,*args)
-        else:
-            raise NameError, "matplotlib package has not been imported."
 
 class Bijective_mapping(Mapping):
     """
diff --git a/GPy/mappings/additive.py b/GPy/mappings/additive.py
index 5297982b..4e7c545d 100644
--- a/GPy/mappings/additive.py
+++ b/GPy/mappings/additive.py
@@ -17,45 +17,25 @@ class Additive(Mapping):
     :type mapping1: GPy.mappings.Mapping
     :param mapping2: second mapping to add together.
     :type mapping2: GPy.mappings.Mapping
-    :param tensor: whether or not to use the tensor product of input spaces
-    :type tensor: bool
 
     """
 
-    def __init__(self, mapping1, mapping2, tensor=False):
-        if tensor:
-            input_dim = mapping1.input_dim + mapping2.input_dim
-        else:
-            input_dim = mapping1.input_dim
-            assert(mapping1.input_dim==mapping2.input_dim)
+    def __init__(self, mapping1, mapping2):
+        assert(mapping1.input_dim==mapping2.input_dim)
         assert(mapping1.output_dim==mapping2.output_dim)
-        output_dim = mapping1.output_dim
+        input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
         Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
         self.mapping1 = mapping1
         self.mapping2 = mapping2
         self.num_params = self.mapping1.num_params + self.mapping2.num_params
         self.name = self.mapping1.name + '+' + self.mapping2.name
-    def _get_param_names(self):
-        return self.mapping1._get_param_names + self.mapping2._get_param_names
-
-    def _get_params(self):
-        return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
-
-    def _set_params(self, x):
-        self.mapping1._set_params(x[:self.mapping1.num_params])
-        self.mapping2._set_params(x[self.mapping1.num_params:])
-        
-    def randomize(self):
-        self.mapping1._randomize()
-        self.mapping2._randomize()
 
     def f(self, X):
         return self.mapping1.f(X) + self.mapping2.f(X)
 
-    def df_dtheta(self, dL_df, X):
-        self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
-        self._df_dbias = (dL_df.sum(0))
-        return np.hstack((self._df_dA.flatten(), self._df_dbias))
+    def update_gradients(self, dL_dF, X):
+        self.mapping1.update_gradients(dL_dF, X)
+        self.mapping2.update_gradients(dL_dF, X)
 
-    def df_dX(self, dL_df, X):
-        return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) 
+    def gradients_X(self, dL_dF, X):
+        return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)
diff --git a/GPy/mappings/identity.py b/GPy/mappings/identity.py
new file mode 100644
index 00000000..b15e476c
--- /dev/null
+++ b/GPy/mappings/identity.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2015, James Hensman
+
+from ..core.mapping import Mapping
+from ..core import Param
+
+class Identity(Mapping):
+    """
+    A mapping that does nothing!
+    """
+    def __init__(self, input_dim, output_dim, name='identity'):
+        Mapping.__init__(self, input_dim, output_dim, name)
+
+    def f(self, X):
+        return X
+
+    def update_gradients(self, dL_dF, X):
+        pass
+
+    def gradients_X(self, dL_dF, X):
+        return dL_dF
+
+
+
+
+
+
diff --git a/GPy/mappings/kernel.py b/GPy/mappings/kernel.py
index 74fa344f..3bfcd388 100644
--- a/GPy/mappings/kernel.py
+++ b/GPy/mappings/kernel.py
@@ -1,9 +1,10 @@
 # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2015, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
 from ..core.mapping import Mapping
-import GPy
+from ..core import Param
 
 class Kernel(Mapping):
     """
@@ -11,50 +12,41 @@ class Kernel(Mapping):
 
     .. math::
 
-       f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
+       f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
 
-    :param X: input observations containing :math:`\mathbf{X}`
-    :type X: ndarray
+    or for multple outputs
+
+    .. math::
+
+       f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
+
+
+    :param input_dim: dimension of input.
+    :type input_dim: int
     :param output_dim: dimension of output.
     :type output_dim: int
+    :param Z: input observations containing :math:`\mathbf{Z}`
+    :type Z: ndarray
     :param kernel: a GPy kernel, defaults to GPy.kern.RBF
     :type kernel: GPy.kern.kern
 
     """
 
-    def __init__(self, X, output_dim=1, kernel=None):
-        Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
-        if kernel is None:
-            kernel = GPy.kern.RBF(self.input_dim)
+    def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
+        Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
         self.kern = kernel
-        self.X = X
-        self.num_data = X.shape[0]
-        self.num_params = self.output_dim*(self.num_data + 1)
-        self.A = np.array((self.num_data, self.output_dim))
-        self.bias = np.array(self.output_dim)
-        self.randomize()
-        self.name = 'kernel'
-    def _get_param_names(self):
-        return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
-
-    def _get_params(self):
-        return np.hstack((self.A.flatten(), self.bias))
-
-    def _set_params(self, x):
-        self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
-        self.bias = x[self.num_data*self.output_dim:].copy()
-
-    def randomize(self):
-        self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
-        self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
+        self.Z = Z
+        self.num_bases, Zdim = X.shape
+        assert Zdim == self.input_dim
+        self.A = GPy.core.Param('A', np.random.randn(self.num_bases, self.output_dim))
+        self.add_parameter(self.A)
 
     def f(self, X):
-        return np.dot(self.kern.K(X, self.X),self.A) + self.bias
+        return np.dot(self.kern.K(X, self.Z), self.A)
 
-    def df_dtheta(self, dL_df, X):
-        self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
-        self._df_dbias = (dL_df.sum(0))
-        return np.hstack((self._df_dA.flatten(), self._df_dbias))
+    def update_gradients(self, dL_dF, X):
+        self.kern.update_gradients_full(np.dot(dL_dF, self.A.T))
+        self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
 
-    def df_dX(self, dL_df, X):
-        return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
+    def gradients_X(self, dL_dF, X):
+        return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index 315dfc0e..e172b4e2 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -1,43 +1,39 @@
 # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
+# Copyright (c) 2015, James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..core.mapping import Bijective_mapping
+from ..core.mapping import Mapping
 from ..core.parameterization import Param
 
-class Linear(Bijective_mapping):
+class Linear(Mapping):
     """
-    Mapping based on a linear model.
+    A Linear mapping.
 
     .. math::
 
-       f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
+       F(\mathbf{x}) = \mathbf{A} \mathbf{x})
 
-    :param X: input observations
-    :type X: ndarray
+
+    :param input_dim: dimension of input.
+    :type input_dim: int
     :param output_dim: dimension of output.
     :type output_dim: int
+    :param kernel: a GPy kernel, defaults to GPy.kern.RBF
+    :type kernel: GPy.kern.kern
 
     """
 
-    def __init__(self, input_dim=1, output_dim=1, name='linear'):
-        Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
-        self.W = Param('W',np.array((self.input_dim, self.output_dim)))
-        self.bias = Param('bias',np.array(self.output_dim))
-        self.link_parameters(self.W, self.bias)
+    def __init__(self, input_dim, output_dim, name='linmap'):
+        Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
+        self.A = GPy.core.Param('A', np.random.randn(self.input_dim, self.output_dim))
+        self.add_parameter(self.A)
 
     def f(self, X):
-        return np.dot(X,self.W) + self.bias
+        return np.dot(X, self.A)
 
-    def g(self, f):
-        V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
-        return np.dot(f-self.bias, V)  
+    def update_gradients(self, dL_dF, X):
+        self.A.gradient = np.dot( X.T dL_dF)
 
-    def df_dtheta(self, dL_df, X):
-        df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
-        df_dbias = (dL_df.sum(0))
-        return np.hstack((df_dW.flatten(), df_dbias))
-
-    def dL_dX(self, partial, X):
-        """The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
-        return (partial[:, None, :]*self.W[None, :, :]).sum(2)
+    def gradients_X(self, dL_dF, X):
+        return np.dot(dL_dF, self.A.T)
diff --git a/GPy/mappings/mlp.py b/GPy/mappings/mlp.py
index 46dbc2a9..f22fc07f 100644
--- a/GPy/mappings/mlp.py
+++ b/GPy/mappings/mlp.py
@@ -3,128 +3,40 @@
 
 import numpy as np
 from ..core.mapping import Mapping
+from ..core import Param
 
 class MLP(Mapping):
     """
-    Mapping based on a multi-layer perceptron neural network model.
-
-    .. math::
-
-       f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
-
-    where
-
-    .. math::
-
-      \\phi(\\cdot) = \\text{tanh}(\\cdot)
-
-    :param X: input observations
-    :type X: ndarray
-    :param output_dim: dimension of output.
-    :type output_dim: int
-    :param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
-    :type hidden_dim: int or list of ints. 
-
+    Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
     """
 
-    def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
-        Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
-        self.name = 'mlp'
-        if isinstance(hidden_dim, int):
-            hidden_dim = [hidden_dim]
+    def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
+        super(MLP).__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
         self.hidden_dim = hidden_dim
-        self.activation = [None]*len(self.hidden_dim)
-        self.W = []
-        self._dL_dW = []
-        self.bias = []
-        self._dL_dbias = []
-        self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
-        self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
-        self.bias.append(np.zeros(self.hidden_dim[0]))
-        self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
-        self.num_params = self.hidden_dim[0]*(self.input_dim+1)
-        for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
-            self.W.append(np.zeros((h0, h1)))
-            self._dL_dW.append(np.zeros((h0, h1)))
-            self.bias.append(np.zeros(h1))
-            self._dL_dbias.append(np.zeros(h1))
-            self.num_params += h1*(h0+1)
-        self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
-        self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
-        self.bias.append(np.zeros(self.output_dim))
-        self._dL_dbias.append(np.zeros(self.output_dim))
-        self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
-        self.randomize()
+        self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
+        self.b1 = Param('b1', np.random.randn(self.hidden_dim))
+        self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
+        self.b2 = Param('b2', np.random.randn(self.output_dim))
 
-    def _get_param_names(self):
-        return sum([['W%i_%i_%i' % (i, n, d)  for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
-
-    def _get_params(self):
-        param = np.array([])
-        for W, bias in zip(self.W, self.bias):
-            param = np.hstack((param, W.flatten(), bias))
-        return param
-    
-    def _set_params(self, x):
-        start = 0
-        for W, bias in zip(self.W, self.bias):
-            end = W.shape[0]*W.shape[1]+start
-            W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
-            start = end
-            end = W.shape[1]+end
-            bias[:] = x[start:end].copy()
-            start = end
-
-    def randomize(self):
-        for W, bias in zip(self.W, self.bias):
-            W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
-            bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
 
     def f(self, X):
-        self._f_computations(X)
-        return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
+        N, D = X.shape
+        activations = np.tanh(np.dot(X,self.W1) + self.b1)
+        self.out = np.dot(self.activations,self.W2) + self.b2
+        return self.output_fn(self.out)
 
-    def _f_computations(self, X):
-        W = self.W[0]
-        bias = self.bias[0]
-        self.activation[0] = np.dot(X,W) + bias
-        for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
-            self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
+    def update_gradients(self, dL_dF, X):
+        activations = np.tanh(np.dot(X,self.W1) + self.b1)
 
-    def df_dtheta(self, dL_df, X):
-        self._df_computations(dL_df, X)
-        g = np.array([])
-        for gW, gbias in zip(self._dL_dW, self._dL_dbias):
-            g = np.hstack((g, gW.flatten(), gbias))
-        return g
 
-    def _df_computations(self, dL_df, X):
-        self._f_computations(X)
-        a0 = self.activation[-1]
-        W = self.W[-1]
-        self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
-        dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
-        self._dL_dbias[-1] = (dL_df.sum(0))
-        for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
-                                                    self._dL_dbias[-2:0:-1],
-                                                    self.W[-2:0:-1],
-                                                    self.bias[-2:0:-1],
-                                                    self.activation[-2::-1],
-                                                    self.activation[-1:0:-1]):
-            ta = np.tanh(a1)
-            dL_da = dL_dta*(1-ta*ta)
-            dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
-            dL_dbias[:] = (dL_da.sum(0))
-            dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
-        ta = np.tanh(self.activation[0])
-        dL_da = dL_dta*(1-ta*ta)
-        W = self.W[0]
-        self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
-        self._dL_dbias[0] = (dL_da.sum(0))
-        self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
+        #Evaluate second-layer gradients.
+        self.W2.gradient = np.dot(activations.T, dL_dF)
+        self.b2.gradient = np.sum(dL_dF, 0)
+
+        # Backpropagation to hidden layer.
+        delta_hid = np.dot(dL_dF, self.W2.T) * (1.0 - activations**2)
+
+        # Finally, evaluate the first-layer gradients.
+        self.W1.gradients = np.dot(X.T,delta_hid)
+        self.b1.gradients = np.sum(delta_hid, 0)
 
-        
-    def df_dX(self, dL_df, X):
-        self._df_computations(dL_df, X)
-        return self._dL_dX
-    
diff --git a/GPy/mappings/piecewise_linear.py b/GPy/mappings/piecewise_linear.py
new file mode 100644
index 00000000..8bdee81e
--- /dev/null
+++ b/GPy/mappings/piecewise_linear.py
@@ -0,0 +1,94 @@
+from GPy.core.mapping import Mapping
+from GPy.core import Param
+import numpy as np
+
+class PiecewiseLinear(Mapping):
+    """
+    A piecewise-linear mapping.
+
+    The parameters of this mapping are the positions and values of the function where it is broken (self.breaks, self.values).
+
+    Outside the range of the breaks, the function is assumed to have gradient 1
+    """
+    def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'):
+
+        assert input_dim==1
+        assert output_dim==1
+
+        Mapping.__init__(self, input_dim, output_dim, name)
+
+        values, breaks = np.array(values).flatten(), np.array(breaks).flatten()
+        assert values.size == breaks.size
+        self.values = Param('values', values)
+        self.breaks = Param('breaks', breaks)
+        self.link_parameter(self.values)
+        self.link_parameter(self.breaks)
+
+    def parameters_changed(self):
+        self.order = np.argsort(self.breaks)*1
+        self.reverse_order = np.zeros_like(self.order)
+        self.reverse_order[self.order] = np.arange(self.order.size)
+
+        self.sorted_breaks = self.breaks[self.order]
+        self.sorted_values = self.values[self.order]
+
+        self.grads = np.diff(self.sorted_values)/np.diff(self.sorted_breaks)
+
+    def f(self, X):
+        x = X.flatten()
+        y = x.copy()
+
+        #first adjus the points below the first value
+        y[x<self.sorted_breaks[0]]  = x[x<self.sorted_breaks[0]] + self.sorted_values[0] - self.sorted_breaks[0]
+
+        #now all the points pas the last break
+        y[x>self.sorted_breaks[-1]]  = x[x>self.sorted_breaks[-1]] + self.sorted_values[-1] - self.sorted_breaks[-1]
+
+        #loop throught the pairs of points
+        for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
+            i = np.logical_and(x>low, x<up)
+            y[i] = v + (x[i]-low)*g
+
+        return y.reshape(-1,1)
+
+    def update_gradients(self, dL_dF, X):
+        x = X.flatten()
+        dL_dF = dL_dF.flatten()
+
+        dL_db = np.zeros(self.sorted_breaks.size)
+        dL_dv = np.zeros(self.sorted_values.size)
+
+        #loop across each interval, computing the gradient for each of the 4 parameters that define it
+        for i, (low, up, g, v) in enumerate(zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1])):
+            index = np.logical_and(x>low, x<up)
+            xx = x[index]
+            grad = dL_dF[index]
+            span = up-low
+            dL_dv[i] += np.sum(grad*( (low - xx)/span + 1))
+            dL_dv[i+1] += np.sum(grad*(xx-low)/span)
+            dL_db[i] += np.sum(grad*g*(xx-up)/span)
+            dL_db[i+1] += np.sum(grad*g*(low-xx)/span)
+
+        #now the end parts
+        dL_db[0] -= np.sum(dL_dF[x<self.sorted_breaks[0]])
+        dL_db[-1] -= np.sum(dL_dF[x>self.sorted_breaks[-1]])
+        dL_dv[0] += np.sum(dL_dF[x<self.sorted_breaks[0]])
+        dL_dv[-1] += np.sum(dL_dF[x>self.sorted_breaks[-1]])
+
+        #now put the gradients back in the correct order!
+        self.breaks.gradient = dL_db[self.reverse_order]
+        self.values.gradient = dL_dv[self.reverse_order]
+
+    def gradients_X(self, dL_dF, X):
+        x = X.flatten()
+
+        #outside the range of the breakpoints, the function is just offset by a contant, so the partial derivative is 1.
+        dL_dX = dL_dF.copy().flatten()
+
+        #insude the breakpoints, the partial derivative is self.grads
+        for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
+            i = np.logical_and(x>low, x<up)
+            dL_dX[i] = dL_dF[i]*g
+
+        return dL_dX.reshape(-1,1)
+

From 871f3a6ea3e5164605d729823c0e8a34d7fb18ae Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 14:28:56 +0000
Subject: [PATCH 111/166] [verbose opt] ipython notebook new version widget
 changes

---
 GPy/core/verbose_optimization.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index f84cfc7e..e95f643e 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -28,19 +28,20 @@ class VerboseOptimization(object):
 
             try:
                 from IPython.display import display
-                from IPython.html.widgets import FloatProgressWidget, HTMLWidget, ContainerWidget
-                self.text = HTMLWidget()
-                self.progress = FloatProgressWidget()
-                self.model_show = HTMLWidget()
+                from IPython.html.widgets import IntProgress, HTML, Box, VBox, HBox, FlexBox
+                self.text = HTML(width='100%')
+                self.progress = IntProgress(description='Progress:', min=0, max=maxiters)
+                #self.progresstext = Text(width='100%', disabled=True, value='0/{}'.format(maxiters))
+                self.model_show = HTML()
                 self.ipython_notebook = ipython_notebook
             except:
                 # Not in Ipython notebook
                 self.ipython_notebook = False
 
             if self.ipython_notebook:
-                left_col = ContainerWidget(children = [self.progress, self.text])
-                right_col = ContainerWidget(children = [self.model_show])
-                self.hor_align = ContainerWidget(children = [left_col, right_col])
+                left_col = VBox(children=[self.progress, self.text], padding=2, width='40%')
+                right_col = Box(children=[self.model_show], padding=2, width='60%')
+                self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
 
                 display(self.hor_align)
                                 
@@ -105,7 +106,8 @@ class VerboseOptimization(object):
                 html_body += "<td class='tg-right'>{}</td>".format(val)
                 html_body += "</tr>"
             self.text.value = html_begin + html_body + html_end
-            self.progress.value = 100*(self.iteration+1)/self.maxiters
+            self.progress.value = (self.iteration+1)
+            #self.progresstext.value = '0/{}'.format((self.iteration+1))
             self.model_show.value = self.model._repr_html_()
         else:
             n_exps = exponents(self.fnow, self.current_gradient)
@@ -149,7 +151,7 @@ class VerboseOptimization(object):
             if not self.ipython_notebook:
                 print ''
                 print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
-                print 'Optimization status: {0:.5g}'.format(self.status)
+                print 'Optimization status: {0:s}'.format(self.status)
                 print
             elif self.clear:
                 self.hor_align.close()

From 66c7da879a621de17cdbe84f60b38ab28d994345 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 23 Mar 2015 14:30:25 +0000
Subject: [PATCH 112/166] [verbose opt] ipython notebook new version widget
 changes

---
 GPy/core/verbose_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index e95f643e..54e650c3 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -30,7 +30,7 @@ class VerboseOptimization(object):
                 from IPython.display import display
                 from IPython.html.widgets import IntProgress, HTML, Box, VBox, HBox, FlexBox
                 self.text = HTML(width='100%')
-                self.progress = IntProgress(description='Progress:', min=0, max=maxiters)
+                self.progress = IntProgress(min=0, max=maxiters)
                 #self.progresstext = Text(width='100%', disabled=True, value='0/{}'.format(maxiters))
                 self.model_show = HTML()
                 self.ipython_notebook = ipython_notebook

From fa801bf46c3c6104ab787cde2ab4e71f2bfabeea Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 14:47:49 +0000
Subject: [PATCH 113/166] mean functions in place

---
 GPy/inference/latent_function_inference/dtc.py              | 6 ++++--
 .../latent_function_inference/exact_gaussian_inference.py   | 4 +++-
 .../latent_function_inference/expectation_propagation.py    | 3 ++-
 .../expectation_propagation_dtc.py                          | 3 ++-
 GPy/inference/latent_function_inference/fitc.py             | 3 ++-
 GPy/inference/latent_function_inference/laplace.py          | 3 ++-
 GPy/inference/latent_function_inference/svgp.py             | 3 ++-
 GPy/mappings/linear.py                                      | 2 +-
 8 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py
index 5590a079..a12726e2 100644
--- a/GPy/inference/latent_function_inference/dtc.py
+++ b/GPy/inference/latent_function_inference/dtc.py
@@ -20,7 +20,8 @@ class DTC(LatentFunctionInference):
     def __init__(self):
         self.const_jitter = 1e-6
 
-    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
+        assert mean_function is None, "inference with a mean function not implemented"
         assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
 
         num_inducing, _ = Z.shape
@@ -88,7 +89,8 @@ class vDTC(object):
     def __init__(self):
         self.const_jitter = 1e-6
 
-    def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
+    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
+        assert mean_function is None, "inference with a mean function not implemented"
         assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
 
         num_inducing, _ = Z.shape
diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 1312d36a..312855f7 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -36,10 +36,12 @@ class ExactGaussianInference(LatentFunctionInference):
             #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
             return Y
 
-    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
         """
         Returns a Posterior class containing essential quantities of the posterior
         """
+        assert mean_function is None, "inference with a mean function not implemented"
+
         YYT_factor = self.get_YYTfactor(Y)
 
         K = kern.K(X)
diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
index 26144974..ba920569 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -33,7 +33,8 @@ class EP(LatentFunctionInference):
         # TODO: update approximation in the end as well? Maybe even with a switch?
         pass
 
-    def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
+    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
+        assert mean_function is None, "inference with a mean function not implemented"
         num_data, output_dim = Y.shape
         assert output_dim ==1, "ep in 1D only (for now!)"
 
diff --git a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
index 35b1b7dc..466cbbb2 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation_dtc.py
@@ -64,7 +64,8 @@ class EPDTC(LatentFunctionInference):
         self.old_mutilde, self.old_vtilde = None, None
         self._ep_approximation = None
 
-    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
+        assert mean_function is None, "inference with a mean function not implemented"
         num_data, output_dim = Y.shape
         assert output_dim ==1, "ep in 1D only (for now!)"
 
diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py
index a184c6c4..f99b35ff 100644
--- a/GPy/inference/latent_function_inference/fitc.py
+++ b/GPy/inference/latent_function_inference/fitc.py
@@ -18,7 +18,8 @@ class FITC(LatentFunctionInference):
     """
     const_jitter = 1e-6
 
-    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
+        assert mean_function is None, "inference with a mean function not implemented"
 
         num_inducing, _ = Z.shape
         num_data, output_dim = Y.shape
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 05711b0b..4e6ece11 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -39,10 +39,11 @@ class Laplace(LatentFunctionInference):
         self.first_run = True
         self._previous_Ki_fhat = None
 
-    def inference(self, kern, X, likelihood, Y, Y_metadata=None):
+    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
         """
         Returns a Posterior class containing essential quantities of the posterior
         """
+        assert mean_function is None, "inference with a mean function not implemented"
 
         # Compute K
         K = kern.K(X)
diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 1974991b..23d36a14 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -6,7 +6,8 @@ from posterior import Posterior
 
 class SVGP(LatentFunctionInference):
 
-    def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
+    def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
+        assert mean_function is None, "inference with a mean function not implemented"
         num_inducing = Z.shape[0]
         num_data, num_outputs = Y.shape
 
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index e172b4e2..6fc91944 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -33,7 +33,7 @@ class Linear(Mapping):
         return np.dot(X, self.A)
 
     def update_gradients(self, dL_dF, X):
-        self.A.gradient = np.dot( X.T dL_dF)
+        self.A.gradient = np.dot( X.T, dL_dF)
 
     def gradients_X(self, dL_dF, X):
         return np.dot(dL_dF, self.A.T)

From 611febe7eb313a26803f4db86120a64ee66a5c96 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 14:48:13 +0000
Subject: [PATCH 114/166] stupid bug

---
 GPy/mappings/linear.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index e172b4e2..6fc91944 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -33,7 +33,7 @@ class Linear(Mapping):
         return np.dot(X, self.A)
 
     def update_gradients(self, dL_dF, X):
-        self.A.gradient = np.dot( X.T dL_dF)
+        self.A.gradient = np.dot( X.T, dL_dF)
 
     def gradients_X(self, dL_dF, X):
         return np.dot(dL_dF, self.A.T)

From 2d312099c00d21511b5fa06f534aa074733dd43a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 14:56:19 +0000
Subject: [PATCH 115/166] added parseing of mean func to gp.py

---
 GPy/core/gp.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 38a7bb3d..732db7e2 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -34,7 +34,7 @@ class GP(Model):
 
 
     """
-    def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
+    def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
         super(GP, self).__init__(name)
 
         assert X.ndim == 2
@@ -75,6 +75,15 @@ class GP(Model):
         assert isinstance(likelihood, likelihoods.Likelihood)
         self.likelihood = likelihood
 
+        #handle the mean function
+        self.mean_function = mean_function
+        if mean_function is not None:
+            assert isinstance(self.mean_function, Mapping)
+            assert mean_function.input_dim == self.input_dim
+            assert mean_function.output_dim == self.output_dim
+            self.add_parameter(mean_function)
+
+
         #find a sensible inference method
         logger.info("initializing inference method")
         if inference_method is None:
@@ -153,7 +162,7 @@ class GP(Model):
             This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
             this method yourself, there may be unexpected consequences.
         """
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
         self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
         self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
 

From e97b6e59aab6243e8440475078ef0300eaae8639 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 14:59:08 +0000
Subject: [PATCH 116/166] added mean function into the prediction

---
 GPy/core/gp.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 732db7e2..d415d995 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -5,6 +5,7 @@ import numpy as np
 import sys
 from .. import kern
 from model import Model
+from mapping import Mapping
 from parameterization import ObsAr
 from .. import likelihoods
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
@@ -201,6 +202,10 @@ class GP(Model):
 
         #force mu to be a column vector
         if len(mu.shape)==1: mu = mu[:,None]
+
+        #add the mean function in
+        if not self.mean_function is None:
+            mu += self.mean_function.f(_Xnew)
         return mu, var
 
     def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):

From 90cc217e37e50d7b329e079085c91f5af9a5b225 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 23 Mar 2015 15:48:20 +0000
Subject: [PATCH 117/166] minimual edits to exact_inference

---
 .../exact_gaussian_inference.py                          | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 312855f7..4c6b70df 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -40,9 +40,14 @@ class ExactGaussianInference(LatentFunctionInference):
         """
         Returns a Posterior class containing essential quantities of the posterior
         """
-        assert mean_function is None, "inference with a mean function not implemented"
 
-        YYT_factor = self.get_YYTfactor(Y)
+        if mean_function is None:
+            m = 0
+        else:
+            m = mean_function.f(X)
+
+
+        YYT_factor = self.get_YYTfactor(Y-m)
 
         K = kern.K(X)
 

From 53081c704de370169400002dfcab8b21ebce82e2 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 24 Mar 2015 13:58:41 +0000
Subject: [PATCH 118/166] derivatives of likelihood things now working for svgp

---
 GPy/inference/latent_function_inference/svgp.py | 2 ++
 GPy/likelihoods/likelihood.py                   | 6 +++++-
 GPy/likelihoods/student_t.py                    | 5 +++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 1974991b..5888bead 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -47,6 +47,8 @@ class SVGP(LatentFunctionInference):
 
         #rescale the F term if working on a batch
         F, dF_dmu, dF_dv =  F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
+        if dF_dthetaL is not None:
+            dF_dthetaL =  dF_dthetaL.sum(1)*batch_scale
 
         #derivatives of expected likelihood
         Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index b1e78b93..0bf9fc6f 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -177,7 +177,11 @@ class Likelihood(Parameterized):
         if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
             stop
 
-        dF_dtheta = None # Not yet implemented
+        if self.size:
+            dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
+            dF_dtheta = np.dot(dF_dtheta, gh_w)
+        else:
+            dF_dtheta = None # Not yet implemented
         return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
 
     def predictive_mean(self, mu, variance, Y_metadata=None):
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index dbd4d94f..c805d1dd 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -180,7 +180,8 @@ class StudentT(Likelihood):
         :rtype: float
         """
         e = y - inv_link_f
-        dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
+        e2 = np.square(e)
+        dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
         return dlogpdf_dvar
 
     def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
@@ -226,7 +227,7 @@ class StudentT(Likelihood):
     def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
         dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
-        return np.hstack((dlogpdf_dvar, dlogpdf_dv))
+        return np.array((dlogpdf_dvar, dlogpdf_dv))
 
     def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)

From 534e0df6066c50893991f94ebc8f71b8e7fe81de Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 24 Mar 2015 14:11:50 +0000
Subject: [PATCH 119/166] some tests for the svgp, and some changes to the
 likelihoods

---
 GPy/likelihoods/bernoulli.py |  2 +-
 GPy/likelihoods/student_t.py |  4 ++--
 GPy/testing/svgp_tests.py    | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 GPy/testing/svgp_tests.py

diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index 26de274b..f5690aa4 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -77,7 +77,7 @@ class Bernoulli(Likelihood):
 
         return Z_hat, mu_hat, sigma2_hat
 
-    def variational_expectations(self, Y, m, v, gh_points=None):
+    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         if isinstance(self.gp_link, link_functions.Probit):
 
             if gh_points is None:
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index c805d1dd..97c2286e 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -35,8 +35,8 @@ class StudentT(Likelihood):
 
         self.log_concave = False
 
-    def parameters_changed(self):
-        self.variance = (self.v / float(self.v - 2)) * self.sigma2
+    #def parameters_changed(self):
+        #self.variance = (self.v / float(self.v - 2)) * self.sigma2
 
     def update_gradients(self, grads):
         """
diff --git a/GPy/testing/svgp_tests.py b/GPy/testing/svgp_tests.py
new file mode 100644
index 00000000..6dc0fa56
--- /dev/null
+++ b/GPy/testing/svgp_tests.py
@@ -0,0 +1,34 @@
+import numpy as np
+import scipy as sp
+import GPy
+
+class SVGP_nonconvex(np.testing.TestCase):
+    """
+    Inference in the SVGP with a student-T likelihood
+    """
+    def setUp(self):
+        X = np.linspace(0,10,100).reshape(-1,1)
+        Z = np.linspace(0,10,10).reshape(-1,1)
+        Y = np.sin(X) + np.random.randn(*X.shape)*0.1
+        Y[50] += 3
+
+        lik = GPy.likelihoods.StudentT(deg_free=2)
+        k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
+        self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
+    def test_grad(self):
+        assert self.m.checkgrad(step=1e-4)
+
+class SVGP_classification(np.testing.TestCase):
+    """
+    Inference in the SVGP with a Bernoulli likelihood
+    """
+    def setUp(self):
+        X = np.linspace(0,10,100).reshape(-1,1)
+        Z = np.linspace(0,10,10).reshape(-1,1)
+        Y = np.where((np.sin(X) + np.random.randn(*X.shape)*0.1)>0, 1,0)
+
+        lik = GPy.likelihoods.Bernoulli()
+        k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
+        self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
+    def test_grad(self):
+        assert self.m.checkgrad(step=1e-4)

From e74bfd81c6139206fe2cd1f686db19397f865398 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 25 Mar 2015 14:22:39 +0000
Subject: [PATCH 120/166] added some clarifying comments with NDL

---
 .../latent_function_inference/expectation_propagation.py       | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
index 26144974..647823bd 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -40,8 +40,11 @@ class EP(LatentFunctionInference):
         K = kern.K(X)
 
         if self._ep_approximation is None:
+
+            #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
             mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
         else:
+            #if we've already run EP, just use the existing approximation stored in self._ep_approximation
             mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
 
         Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

From cf0e29b207bc8004e635c20c002ed9c6cfed3387 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 26 Mar 2015 08:48:56 +0000
Subject: [PATCH 121/166] working mean function examples

---
 GPy/core/gp.py                                |  4 +-
 GPy/examples/regression.py                    | 45 +++++++++++++++++++
 .../exact_gaussian_inference.py               |  2 +-
 GPy/mappings/linear.py                        |  4 +-
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index d415d995..fd39069d 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -82,7 +82,7 @@ class GP(Model):
             assert isinstance(self.mean_function, Mapping)
             assert mean_function.input_dim == self.input_dim
             assert mean_function.output_dim == self.output_dim
-            self.add_parameter(mean_function)
+            self.link_parameter(mean_function)
 
 
         #find a sensible inference method
@@ -166,6 +166,8 @@ class GP(Model):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
         self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
         self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
+        if self.mean_function is not None:
+            self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
 
     def log_likelihood(self):
         """
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 37a18f63..0e68d0bf 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -505,3 +505,48 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
 
     print m
     return m
+
+def simple_mean_function(max_iters=100, optimize=True, plot=True):
+    """
+    The simplest possible mean function. No parameters, just a simple Sinusoid.
+    """
+    #create  simple mean function
+    mf = GPy.core.Mapping(1,1)
+    mf.f = np.sin
+    mf.update_gradients = lambda a,b: None
+
+    X = np.linspace(0,10,50).reshape(-1,1)
+    Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
+
+    k =GPy.kern.RBF(1)
+    lik = GPy.likelihoods.Gaussian()
+    m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
+    if optimize:
+        m.optimize(max_iters=max_iters)
+    if plot:
+        m.plot(plot_limits=(-10,15))
+    return m
+
+def parametric_mean_function(max_iters=100, optimize=True, plot=True):
+    """
+    A linear mean function with parameters that we'll learn alongside the kernel
+    """
+    #create  simple mean function
+    mf = GPy.core.Mapping(1,1)
+    mf.f = np.sin
+
+    X = np.linspace(0,10,50).reshape(-1,1)
+    Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
+
+    mf = GPy.mappings.Linear(1,1)
+
+    k =GPy.kern.RBF(1)
+    lik = GPy.likelihoods.Gaussian()
+    m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
+    if optimize:
+        m.optimize(max_iters=max_iters)
+    if plot:
+        m.plot()
+    return m
+
+
diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 4c6b70df..b2f1b7d0 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -63,4 +63,4 @@ class ExactGaussianInference(LatentFunctionInference):
 
         dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
 
-        return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
+        return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index 6fc91944..ee464694 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -26,8 +26,8 @@ class Linear(Mapping):
 
     def __init__(self, input_dim, output_dim, name='linmap'):
         Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
-        self.A = GPy.core.Param('A', np.random.randn(self.input_dim, self.output_dim))
-        self.add_parameter(self.A)
+        self.A = Param('A', np.random.randn(self.input_dim, self.output_dim))
+        self.link_parameter(self.A)
 
     def f(self, X):
         return np.dot(X, self.A)

From 624117eaac16a0674201471119d940ccd81b1771 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 26 Mar 2015 11:27:20 +0000
Subject: [PATCH 122/166] mappings, including tests

---
 GPy/mappings/__init__.py     |  3 +-
 GPy/mappings/additive.py     |  5 +--
 GPy/mappings/compound.py     | 39 +++++++++++++++++++
 GPy/mappings/kernel.py       |  8 ++--
 GPy/mappings/mlp.py          | 33 ++++++++++++-----
 GPy/testing/mapping_tests.py | 72 ++++++++++++++++++++++++++++++++++++
 6 files changed, 141 insertions(+), 19 deletions(-)
 create mode 100644 GPy/mappings/compound.py
 create mode 100644 GPy/testing/mapping_tests.py

diff --git a/GPy/mappings/__init__.py b/GPy/mappings/__init__.py
index d331c678..b1cb194b 100644
--- a/GPy/mappings/__init__.py
+++ b/GPy/mappings/__init__.py
@@ -4,4 +4,5 @@
 from kernel import Kernel
 from linear import Linear
 from mlp import MLP
-#from rbf import RBF
+from additive import Additive
+from compound import Compound
diff --git a/GPy/mappings/additive.py b/GPy/mappings/additive.py
index 4e7c545d..1c86b680 100644
--- a/GPy/mappings/additive.py
+++ b/GPy/mappings/additive.py
@@ -2,8 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..core.mapping import Mapping
-import GPy
+from ..core import Mapping
 
 class Additive(Mapping):
     """
@@ -27,8 +26,6 @@ class Additive(Mapping):
         Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
         self.mapping1 = mapping1
         self.mapping2 = mapping2
-        self.num_params = self.mapping1.num_params + self.mapping2.num_params
-        self.name = self.mapping1.name + '+' + self.mapping2.name
 
     def f(self, X):
         return self.mapping1.f(X) + self.mapping2.f(X)
diff --git a/GPy/mappings/compound.py b/GPy/mappings/compound.py
new file mode 100644
index 00000000..5a1e8dd1
--- /dev/null
+++ b/GPy/mappings/compound.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2015, James Hensman and Alan Saul
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from ..core import Mapping
+
+class Compound(Mapping):
+    """
+    Mapping based on passing one mapping through another
+
+    .. math::
+
+       f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
+
+    :param mapping1: first mapping
+    :type mapping1: GPy.mappings.Mapping
+    :param mapping2: second mapping
+    :type mapping2: GPy.mappings.Mapping
+
+    """
+
+    def __init__(self, mapping1, mapping2):
+        assert(mapping1.output_dim==mapping2.input_dim)
+        input_dim, output_dim = mapping1.input_dim, mapping2.output_dim
+        Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
+        self.mapping1 = mapping1
+        self.mapping2 = mapping2
+        self.link_parameters(self.mapping1, self.mapping2)
+
+    def f(self, X):
+        return self.mapping2.f(self.mapping1.f(X))
+
+    def update_gradients(self, dL_dF, X):
+        hidden = self.mapping1.f(X)
+        self.mapping2.update_gradients(dL_dF, hidden)
+        self.mapping1.update_gradients(self.mapping2.gradients_X(dL_dF, hidden), X)
+
+    def gradients_X(self, dL_dF, X):
+        hidden = self.mapping1.f(X)
+        return self.mapping1.gradients_X(self.mapping2.gradients_X(dL_dF, hidden), X)
diff --git a/GPy/mappings/kernel.py b/GPy/mappings/kernel.py
index 3bfcd388..ea1720db 100644
--- a/GPy/mappings/kernel.py
+++ b/GPy/mappings/kernel.py
@@ -36,16 +36,16 @@ class Kernel(Mapping):
         Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
         self.kern = kernel
         self.Z = Z
-        self.num_bases, Zdim = X.shape
+        self.num_bases, Zdim = Z.shape
         assert Zdim == self.input_dim
-        self.A = GPy.core.Param('A', np.random.randn(self.num_bases, self.output_dim))
-        self.add_parameter(self.A)
+        self.A = Param('A', np.random.randn(self.num_bases, self.output_dim))
+        self.link_parameter(self.A)
 
     def f(self, X):
         return np.dot(self.kern.K(X, self.Z), self.A)
 
     def update_gradients(self, dL_dF, X):
-        self.kern.update_gradients_full(np.dot(dL_dF, self.A.T))
+        self.kern.update_gradients_full(np.dot(dL_dF, self.A.T), X, self.Z)
         self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
 
     def gradients_X(self, dL_dF, X):
diff --git a/GPy/mappings/mlp.py b/GPy/mappings/mlp.py
index f22fc07f..f0fe21e5 100644
--- a/GPy/mappings/mlp.py
+++ b/GPy/mappings/mlp.py
@@ -11,32 +11,45 @@ class MLP(Mapping):
     """
 
     def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
-        super(MLP).__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
+        super(MLP, self).__init__(input_dim=input_dim, output_dim=output_dim, name=name)
         self.hidden_dim = hidden_dim
         self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
         self.b1 = Param('b1', np.random.randn(self.hidden_dim))
         self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
         self.b2 = Param('b2', np.random.randn(self.output_dim))
+        self.link_parameters(self.W1, self.b1, self.W2, self.b2)
 
 
     def f(self, X):
-        N, D = X.shape
-        activations = np.tanh(np.dot(X,self.W1) + self.b1)
-        self.out = np.dot(self.activations,self.W2) + self.b2
-        return self.output_fn(self.out)
+        layer1 = np.dot(X, self.W1) + self.b1
+        activations = np.tanh(layer1)
+        return  np.dot(activations, self.W2) + self.b2
 
     def update_gradients(self, dL_dF, X):
-        activations = np.tanh(np.dot(X,self.W1) + self.b1)
-
+        layer1 = np.dot(X,self.W1) + self.b1
+        activations = np.tanh(layer1)
 
         #Evaluate second-layer gradients.
         self.W2.gradient = np.dot(activations.T, dL_dF)
         self.b2.gradient = np.sum(dL_dF, 0)
 
         # Backpropagation to hidden layer.
-        delta_hid = np.dot(dL_dF, self.W2.T) * (1.0 - activations**2)
+        dL_dact = np.dot(dL_dF, self.W2.T)
+        dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
 
         # Finally, evaluate the first-layer gradients.
-        self.W1.gradients = np.dot(X.T,delta_hid)
-        self.b1.gradients = np.sum(delta_hid, 0)
+        self.W1.gradient = np.dot(X.T,dL_dlayer1)
+        self.b1.gradient = np.sum(dL_dlayer1, 0)
+
+    def gradients_X(self, dL_dF, X):
+        layer1 = np.dot(X,self.W1) + self.b1
+        activations = np.tanh(layer1)
+
+        # Backpropagation to hidden layer.
+        dL_dact = np.dot(dL_dF, self.W2.T)
+        dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
+        
+        return np.dot(dL_dlayer1, self.W1.T)
+
+
 
diff --git a/GPy/testing/mapping_tests.py b/GPy/testing/mapping_tests.py
new file mode 100644
index 00000000..2e32dad3
--- /dev/null
+++ b/GPy/testing/mapping_tests.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class MappingGradChecker(GPy.core.Model):
+    """
+    This class has everything we need to check the gradient of a mapping. It
+    implement a simple likelihood which is a weighted sum of the outputs of the
+    mapping. the gradients are checked against the parameters of the mapping
+    and the input.
+    """
+    def __init__(self, mapping, X, name='map_grad_check'):
+        super(MappingGradChecker, self).__init__(name)
+        self.mapping = mapping
+        self.link_parameter(self.mapping)
+        self.X = GPy.core.Param('X',X)
+        self.link_parameter(self.X)
+        self.dL_dY = np.random.randn(self.X.shape[0], self.mapping.output_dim)
+    def log_likelihood(self):
+        return np.sum(self.mapping.f(self.X) * self.dL_dY)
+    def parameters_changed(self):
+        self.X.gradient = self.mapping.gradients_X(self.dL_dY, self.X)
+        self.mapping.update_gradients(self.dL_dY, self.X)
+
+
+
+
+
+
+
+class MappingTests(unittest.TestCase):
+
+    def test_kernelmapping(self):
+        X = np.random.randn(100,3)
+        Z = np.random.randn(10,3)
+        mapping = GPy.mappings.Kernel(3, 2, Z, GPy.kern.RBF(3))
+        self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
+
+    def test_linearmapping(self):
+        mapping = GPy.mappings.Linear(3, 2)
+        X = np.random.randn(100,3)
+        self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
+
+    def test_mlpmapping(self):
+        mapping = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
+        X = np.random.randn(100,3)
+        self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
+
+    def test_addmapping(self):
+        m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
+        m2 = GPy.mappings.Linear(input_dim=3, output_dim=2)
+        mapping = GPy.mappings.Additive(m1, m2)
+        X = np.random.randn(100,3)
+        self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
+
+    def test_compoundmapping(self):
+        m1 = GPy.mappings.MLP(input_dim=3, hidden_dim=5, output_dim=2)
+        Z = np.random.randn(10,2)
+        m2 = GPy.mappings.Kernel(2, 4, Z, GPy.kern.RBF(2))
+        mapping = GPy.mappings.Compound(m1, m2)
+        X = np.random.randn(100,3)
+        self.assertTrue(MappingGradChecker(mapping, X).checkgrad())
+
+
+
+
+if __name__ == "__main__":
+    print "Running unit tests, please be (very) patient..."
+    unittest.main()

From 02f2bb5c76b1c0c7c35d43731f2fccd927d713a7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 26 Mar 2015 11:45:18 +0000
Subject: [PATCH 123/166] fixed up product kernel tests

---
 GPy/kern/_src/prod.py       |  1 +
 GPy/testing/kernel_tests.py | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py
index b29c85eb..63b23f45 100644
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@@ -80,6 +80,7 @@ class Prod(CombinationKernel):
         if len(self.parts)==2:
             target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
             target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
+        else:
             for combination in itertools.combinations(self.parts, len(self.parts) - 1):
                 prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
                 to_update = list(set(self.parts) - set(combination))[0]
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 415cc7eb..458f5cd8 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -256,13 +256,23 @@ class KernelGradientTestsContinuous(unittest.TestCase):
         k.randomize()
         self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
 
+    def test_Prod1(self):
+        k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D)
+        k.randomize()
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
     def test_Prod2(self):
-        k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
+        k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)
         k.randomize()
         self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
 
     def test_Prod3(self):
-        k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
+        k = GPy.kern.RBF(self.D) * GPy.kern.Linear(self.D) * GPy.kern.Bias(self.D)
+        k.randomize()
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
+    def test_Prod4(self):
+        k = GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D) * GPy.kern.Matern32(2, active_dims=[0,1])
         k.randomize()
         self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
 

From 72de607199f7645db206521168d423a35363652f Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 26 Mar 2015 16:20:17 +0000
Subject: [PATCH 124/166] mean functions now working for svgp. with tests

---
 GPy/core/sparse_gp.py                         | 16 ++++---
 GPy/core/svgp.py                              | 13 +++--
 .../latent_function_inference/posterior.py    |  5 +-
 .../latent_function_inference/svgp.py         | 47 ++++++++++++++++---
 GPy/testing/svgp_tests.py                     | 20 ++++++++
 5 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 4fcade79..a81b77fa 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -19,7 +19,7 @@ class SparseGP(GP):
     This model allows (approximate) inference using variational DTC or FITC
     (Gaussian likelihoods) as well as non-conjugate sparse methods based on
     these.
-    
+
     This is not for missing data, as the implementation for missing data involves
     some inefficient optimization routine decisions.
     See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
@@ -39,7 +39,7 @@ class SparseGP(GP):
 
     """
 
-    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
+    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None,
                  name='sparse gp', Y_metadata=None, normalizer=False):
         #pick a sensible inference method
         if inference_method is None:
@@ -53,7 +53,7 @@ class SparseGP(GP):
         self.Z = Param('inducing inputs', Z)
         self.num_inducing = Z.shape[0]
 
-        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
 
         logger.info("Adding Z as parameter")
         self.link_parameter(self.Z, index=0)
@@ -61,7 +61,7 @@ class SparseGP(GP):
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
-    
+
     def set_Z(self, Z, trigger_update=True):
         if trigger_update: self.update_model(False)
         self.unlink_parameter(self.Z)
@@ -110,8 +110,8 @@ class SparseGP(GP):
 
     def _raw_predict(self, Xnew, full_cov=False, kern=None):
         """
-        Make a prediction for the latent function values. 
-    
+        Make a prediction for the latent function values.
+
         For certain inputs we give back a full_cov of shape NxN,
         if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, 
         we take only the diagonal elements across N.
@@ -136,6 +136,9 @@ class SparseGP(GP):
             else:
                 Kxx = kern.Kdiag(Xnew)
                 var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
+            #add in the mean function
+            if self.mean_function is not None:
+                mu += self.mean_function.f(Xnew)
         else:
             psi0_star = self.kern.psi0(self.Z, Xnew)
             psi1_star = self.kern.psi1(self.Z, Xnew)
@@ -165,4 +168,5 @@ class SparseGP(GP):
                     var[i] = var_
                 else:
                     var[i] = np.diag(var_)+p0-t2
+
         return mu, var
diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py
index 1966dbef..7783f3b1 100644
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
 
 
 class SVGP(SparseGP):
-    def __init__(self, X, Y, Z, kernel, likelihood, name='SVGP', Y_metadata=None, batchsize=None):
+    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
         """
         Stochastic Variational GP.
 
@@ -38,7 +38,7 @@ class SVGP(SparseGP):
         #create the SVI inference method
         inf_method = svgp_inf()
 
-        SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
+        SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
                  name=name, Y_metadata=Y_metadata, normalizer=False)
 
         self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
@@ -48,7 +48,7 @@ class SVGP(SparseGP):
         self.link_parameter(self.m)
 
     def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.mean_function, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
 
         #update the kernel gradients
         self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
@@ -65,6 +65,13 @@ class SVGP(SparseGP):
         self.m.gradient = self.grad_dict['dL_dm']
         self.chol.gradient = self.grad_dict['dL_dchol']
 
+        if self.mean_function is not None:
+            self.mean_function.update_gradients(self.grad_dict['dL_dmfX'], self.X)
+            g = self.mean_function.gradient[:].copy()
+            self.mean_function.update_gradients(self.grad_dict['dL_dmfZ'], self.Z)
+            self.mean_function.gradient[:] += g
+            self.Z.gradient[:] += self.mean_function.gradients_X(self.grad_dict['dL_dmfZ'], self.Z)
+
     def set_data(self, X, Y):
         """
         Set the data without calling parameters_changed to avoid wasted computation
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
index 34f0b3bb..a1d42c74 100644
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@@ -15,7 +15,7 @@ class Posterior(object):
     the function at any new point x_* by integrating over this posterior.
 
     """
-    def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
+    def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
         """
         woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
         woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
@@ -67,6 +67,7 @@ class Posterior(object):
         #option 2:
         self._mean = mean
         self._covariance = cov
+        self._prior_mean = prior_mean
 
         #compute this lazily
         self._precision = None
@@ -175,7 +176,7 @@ class Posterior(object):
         $$
         """
         if self._woodbury_vector is None:
-            self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
+            self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
         return self._woodbury_vector
 
     @property
diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 48763426..da003793 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -7,7 +7,7 @@ from posterior import Posterior
 class SVGP(LatentFunctionInference):
 
     def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
-        assert mean_function is None, "inference with a mean function not implemented"
+
         num_inducing = Z.shape[0]
         num_data, num_outputs = Y.shape
 
@@ -23,6 +23,15 @@ class SVGP(LatentFunctionInference):
             #S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
             #Si, Lnew, _,_ = linalg.pdinv(S)
 
+        #compute mean function stuff
+        if mean_function is not None:
+            prior_mean_u = mean_function.f(Z)
+            prior_mean_f = mean_function.f(X)
+        else:
+            prior_mean_u = np.zeros((num_inducing, num_outputs))
+            prior_mean_f = np.zeros((num_data, num_outputs))
+
+
         #compute kernel related stuff
         Kmm = kern.K(Z)
         Knm = kern.K(X, Z)
@@ -31,17 +40,31 @@ class SVGP(LatentFunctionInference):
 
         #compute the marginal means and variances of q(f)
         A = np.dot(Knm, Kmmi)
-        mu = np.dot(A, q_u_mean)
+        mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
         v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
 
         #compute the KL term
         Kmmim = np.dot(Kmmi, q_u_mean)
         KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
         KL = KLs.sum()
-        dKL_dm = Kmmim
+        #gradient of the KL term (assuming zero mean function)
+        dKL_dm = Kmmim.copy()
         dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
         dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
 
+        if mean_function is not None:
+            #adjust KL term for mean function
+            Kmmi_mfZ = np.dot(Kmmi, prior_mean_u)
+            KL += -np.sum(q_u_mean*Kmmi_mfZ)
+            KL += 0.5*np.sum(Kmmi_mfZ*prior_mean_u)
+
+            #adjust gradient for mean fucntion
+            dKL_dm -= Kmmi_mfZ
+            dKL_dKmm += Kmmim.dot(Kmmi_mfZ.T)
+            dKL_dKmm -= 0.5*Kmmi_mfZ.dot(Kmmi_mfZ.T)
+
+            #compute gradients for mean_function
+            dKL_dmfZ = Kmmi_mfZ - Kmmim
 
         #quadrature for the likelihood
         F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
@@ -51,11 +74,9 @@ class SVGP(LatentFunctionInference):
         if dF_dthetaL is not None:
             dF_dthetaL =  dF_dthetaL.sum(1)*batch_scale
 
-        #derivatives of expected likelihood
+        #derivatives of expected likelihood, assuming zero mean function
         Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
         Admu = A.T.dot(dF_dmu)
-        #AdvA = np.einsum('ijk,jl->ilk', Adv, A)
-        #AdvA = np.dot(A.T, Adv).swapaxes(0,1)
         AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
         tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
         dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
@@ -65,6 +86,14 @@ class SVGP(LatentFunctionInference):
         dF_dm = Admu
         dF_dS = AdvA
 
+        #adjust gradient to account for mean function
+        if mean_function is not None:
+            dF_dmfX = dF_dmu.copy()
+            dF_dmfZ = -Admu
+            dF_dKmn -= np.dot(Kmmi_mfZ, dF_dmu.T)
+            dF_dKmm += Admu.dot(Kmmi_mfZ.T)
+
+
         #sum (gradients of) expected likelihood and KL part
         log_marginal = F.sum() - KL
         dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
@@ -72,4 +101,8 @@ class SVGP(LatentFunctionInference):
         dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
         dL_dchol = choleskies.triang_to_flat(dL_dchol)
 
-        return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
+        grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
+        if mean_function is not None:
+            grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
+            grad_dict['dL_dmfX'] = dF_dmfX
+        return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
diff --git a/GPy/testing/svgp_tests.py b/GPy/testing/svgp_tests.py
index 6dc0fa56..beb9c00d 100644
--- a/GPy/testing/svgp_tests.py
+++ b/GPy/testing/svgp_tests.py
@@ -32,3 +32,23 @@ class SVGP_classification(np.testing.TestCase):
         self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
     def test_grad(self):
         assert self.m.checkgrad(step=1e-4)
+
+class SVGP_Poisson_with_meanfunction(np.testing.TestCase):
+    """
+    Inference in the SVGP with a Bernoulli likelihood
+    """
+    def setUp(self):
+        X = np.linspace(0,10,100).reshape(-1,1)
+        Z = np.linspace(0,10,10).reshape(-1,1)
+        latent_f = np.exp(0.1*X * 0.05*X**2)
+        Y = np.array([np.random.poisson(f) for f in latent_f.flatten()]).reshape(-1,1)
+
+        mf = GPy.mappings.Linear(1,1)
+
+        lik = GPy.likelihoods.Poisson()
+        k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
+        self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k, mean_function=mf)
+    def test_grad(self):
+        assert self.m.checkgrad(step=1e-4)
+
+

From 55dba3d2d972671178fc0d0ac3aeeeb4150b0530 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 27 Mar 2015 11:14:41 +0000
Subject: [PATCH 125/166] shape changes for gradeitns of likelihood parameters
 in variational_expectations

---
 GPy/inference/latent_function_inference/svgp.py | 2 +-
 GPy/likelihoods/gaussian.py                     | 9 ++++-----
 GPy/likelihoods/likelihood.py                   | 1 +
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index 5888bead..d4797311 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -48,7 +48,7 @@ class SVGP(LatentFunctionInference):
         #rescale the F term if working on a batch
         F, dF_dmu, dF_dv =  F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
         if dF_dthetaL is not None:
-            dF_dthetaL =  dF_dthetaL.sum(1)*batch_scale
+            dF_dthetaL =  dF_dthetaL.sum(1).sum(1)*batch_scale
 
         #derivatives of expected likelihood
         Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 4e7de9e3..85878973 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -305,18 +305,17 @@ class Gaussian(Likelihood):
         Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
         return Ysim.reshape(orig_shape)
 
-    def log_predictive_density(self, y_test, mu_star, var_star):
+    def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
         """
         assumes independence
         """
         v = var_star + self.variance
         return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
 
-    def variational_expectations(self, Y, m, v, gh_points=None):
+    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         lik_var = float(self.variance)
         F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
         dF_dmu = (Y - m)/lik_var
         dF_dv = np.ones_like(v)*(-0.5/lik_var)
-        dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2))
-        dF_dtheta = [dF_dlik_var]
-        return F, dF_dmu, dF_dv, dF_dtheta
+        dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
+        return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 0bf9fc6f..5158a208 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -180,6 +180,7 @@ class Likelihood(Parameterized):
         if self.size:
             dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
             dF_dtheta = np.dot(dF_dtheta, gh_w)
+            dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
         else:
             dF_dtheta = None # Not yet implemented
         return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta

From d7316ee7d916ea38fb87b21876c71d849a09512a Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 27 Mar 2015 13:49:12 +0000
Subject: [PATCH 126/166] Relaxed inference test requirement

---
 GPy/testing/inference_tests.py | 42 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py
index ac92c519..d5039049 100644
--- a/GPy/testing/inference_tests.py
+++ b/GPy/testing/inference_tests.py
@@ -11,39 +11,38 @@ import GPy
 
 
 class InferenceXTestCase(unittest.TestCase):
-    
+
     def genData(self):
         D1,D2,N = 12,12,50
-        np.random.seed(1234)
-    
+
         x = np.linspace(0, 4 * np.pi, N)[:, None]
         s1 = np.vectorize(lambda x: np.sin(x))
         s2 = np.vectorize(lambda x: np.cos(x)**2)
         s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
         sS = np.vectorize(lambda x: np.cos(x))
-    
+
         s1 = s1(x)
         s2 = s2(x)
         s3 = s3(x)
         sS = sS(x)
-    
+
         s1 -= s1.mean(); s1 /= s1.std(0)
         s2 -= s2.mean(); s2 /= s2.std(0)
         s3 -= s3.mean(); s3 /= s3.std(0)
         sS -= sS.mean(); sS /= sS.std(0)
-    
+
         S1 = np.hstack([s1, sS])
         S2 = np.hstack([s3, sS])
-    
+
         P1 = np.random.randn(S1.shape[1], D1)
         P2 = np.random.randn(S2.shape[1], D2)
-    
+
         Y1 = S1.dot(P1)
         Y2 = S2.dot(P2)
-    
+
         Y1 += .01 * np.random.randn(*Y1.shape)
         Y2 += .01 * np.random.randn(*Y2.shape)
-    
+
         Y1 -= Y1.mean(0)
         Y2 -= Y2.mean(0)
         Y1 /= Y1.std(0)
@@ -52,33 +51,34 @@ class InferenceXTestCase(unittest.TestCase):
         slist = [s1, s2, s3, sS]
         slist_names = ["s1", "s2", "s3", "sS"]
         Ylist = [Y1, Y2]
-        
+
         return Ylist
-    
+
     def test_inferenceX_BGPLVM(self):
         Ys = self.genData()
         m = GPy.models.BayesianGPLVM(Ys[0],5,kernel=GPy.kern.Linear(5,ARD=True))
-        
+
         x,mi = m.infer_newX(m.Y, optimize=False)
         self.assertTrue(mi.checkgrad())
-        
-        m.optimize(max_iters=10000)
-        x,mi = m.infer_newX(m.Y)
 
-        self.assertTrue(np.allclose(m.X.mean, mi.X.mean))
-        self.assertTrue(np.allclose(m.X.variance, mi.X.variance))
+        m.optimize(max_iters=10000)
+        x, mi = m.infer_newX(m.Y)
+
+        print m.X.mean - mi.X.mean
+        self.assertTrue(np.allclose(m.X.mean, mi.X.mean, rtol=1e-4, atol=1e-4))
+        self.assertTrue(np.allclose(m.X.variance, mi.X.variance, rtol=1e-4, atol=1e-4))
 
     def test_inferenceX_GPLVM(self):
         Ys = self.genData()
         m = GPy.models.GPLVM(Ys[0],3,kernel=GPy.kern.RBF(3,ARD=True))
-        
+
         x,mi = m.infer_newX(m.Y, optimize=False)
         self.assertTrue(mi.checkgrad())
-        
+
 #         m.optimize(max_iters=10000)
 #         x,mi = m.infer_newX(m.Y)
 #         self.assertTrue(np.allclose(m.X, x))
-        
+
 
 if __name__ == "__main__":
     unittest.main()

From 932b5468ae41ffa33a2f612073b3f25548e5d164 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 11 Feb 2014 12:14:11 +0000
Subject: [PATCH 127/166] Adding likelihoods and block matrices

---
 GPy/util/block_matrices.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py
index 95920868..cdbb1b0b 100644
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@@ -17,6 +17,23 @@ def get_blocks(A, blocksizes):
         count_i += i
     return B
 
+def get_block_shapes(B):
+    assert B.dtype is np.dtype('object'), "Must be a block matrix"
+    return [B[b,b].shape[0] for b in range(0, B.shape[0])]
+
+def unblock(B):
+    assert B.dtype is np.dtype('object'), "Must be a block matrix"
+    block_shapes = get_block_shapes(B)
+    num_elements = np.sum(block_shapes)
+    A = np.empty(shape=(num_elements, num_elements))
+    count_i = 0
+    for Bi, i in enumerate(block_shapes):
+        count_j = 0
+        for Bj, j in enumerate(block_shapes):
+            A[count_i:count_i + i, count_j:count_j + j] = B[Bi, Bj]
+            count_j += j
+        count_i += i
+    return A
 
 
 if __name__=='__main__':
@@ -24,3 +41,8 @@ if __name__=='__main__':
     B = get_blocks(A,[2,3])
     B[0,0] += 7
     print B
+
+    assert np.all(unblock(B) == A)
+
+    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+

From 6a1de2bfc2dccd30c20a0bb30902a283eca1b6d1 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Wed, 12 Feb 2014 10:39:15 +0000
Subject: [PATCH 128/166] Added block matrix dot product

---
 GPy/util/block_matrices.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py
index cdbb1b0b..464e3ba1 100644
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@@ -35,6 +35,37 @@ def unblock(B):
         count_i += i
     return A
 
+def block_dot(A, B):
+    """
+    Element wise dot product on block matricies
+
+    +------+------+   +------+------+    +-------+-------+
+    |      |      |   |      |      |    |A11.B11|B12.B12|
+    | A11  | A12  |   | B11  | B12  |    |       |       |
+    +------+------+ o +------+------| =  +-------+-------+
+    |      |      |   |      |      |    |A21.B21|A22.B22|
+    | A21  | A22  |   | B21  | B22  |    |       |       |
+    +-------------+   +------+------+    +-------+-------+
+
+    ..Note
+        If either (A or B) of the diagonal matrices are stored as vectors then a more
+        efficient dot product using numpy broadcasting will be used, i.e. A11*B11
+    """
+    #Must have same number of blocks and be a block matrix
+    assert A.dtype is np.dtype('object'), "Must be a block matrix"
+    assert B.dtype is np.dtype('object'), "Must be a block matrix"
+    Ashape = A.shape
+    Bshape = B.shape
+    assert Ashape == Bshape
+    def f(A,B):
+        if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
+            #FIXME: Careful if one is transpose of other, would make a matrix
+            return A*B
+        else:
+            return np.dot(A,B)
+    dot = np.vectorize(f, otypes = [np.object])
+    return dot(A,B)
+
 
 if __name__=='__main__':
     A = np.zeros((5,5))
@@ -43,6 +74,3 @@ if __name__=='__main__':
     print B
 
     assert np.all(unblock(B) == A)
-
-    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
-

From 0ea3d336957372a9ee7e40b9db116c881e99279b Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 27 Mar 2015 14:17:03 +0000
Subject: [PATCH 129/166] Merging with private repo, mostly fixed

---
 .../latent_function_inference/__init__.py     |  12 +-
 .../latent_function_inference/laplace.py      | 205 +++++++-
 GPy/likelihoods/bernoulli.py                  |  38 ++
 GPy/likelihoods/gaussian.py                   |  19 +-
 GPy/likelihoods/likelihood.py                 | 255 ++++++++--
 GPy/likelihoods/student_t.py                  |   7 +-
 GPy/testing/likelihood_tests.py               | 472 +++++++++---------
 GPy/util/misc.py                              |  78 ++-
 8 files changed, 768 insertions(+), 318 deletions(-)

diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index 67f57638..dc7789ba 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -50,19 +50,19 @@ class InferenceMethodList(LatentFunctionInference, list):
     def on_optimization_end(self):
         for inf in self:
             inf.on_optimization_end()
-    
+
     def __getstate__(self):
         state = []
         for inf in self:
             state.append(inf)
         return state
-    
+
     def __setstate__(self, state):
         for inf in state:
             self.append(inf)
 
 from exact_gaussian_inference import ExactGaussianInference
-from laplace import Laplace
+from laplace import Laplace, LaplaceBlock
 from GPy.inference.latent_function_inference.var_dtc import VarDTC
 from expectation_propagation import EP
 from expectation_propagation_dtc import EPDTC
@@ -78,9 +78,9 @@ from svgp import SVGP
 # class EMLikeLatentFunctionInference(LatentFunctionInference):
 #     def update_approximation(self):
 #         """
-#         This function gets called when the 
+#         This function gets called when the
 #         """
-#     
+#
 #     def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
 #         """
 #         Do inference on the latent functions given a covariance function `kern`,
@@ -88,7 +88,7 @@ from svgp import SVGP
 #         Additional metadata for the outputs `Y` can be given in `Y_metadata`.
 #         """
 #         raise NotImplementedError, "Abstract base class for full inference"
-# 
+#
 # class VariationalLatentFunctionInference(LatentFunctionInference):
 #     def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
 #         """
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 05711b0b..4e25b4b1 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -43,28 +43,31 @@ class Laplace(LatentFunctionInference):
         """
         Returns a Posterior class containing essential quantities of the posterior
         """
-
         # Compute K
         K = kern.K(X)
 
         #Find mode
         if self.bad_fhat or self.first_run:
             Ki_f_init = np.zeros_like(Y)
-            first_run = False
+            self.first_run = False
         else:
             Ki_f_init = self._previous_Ki_fhat
 
+        Ki_f_init = np.zeros_like(Y)# FIXME: take this out
+
         f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
+
         self.f_hat = f_hat
-        self.Ki_fhat =  Ki_fhat
-        self.K = K.copy()
+        #self.Ki_fhat =  Ki_fhat
+        #self.K = K.copy()
+
         #Compute hessian and other variables at mode
         log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
 
         self._previous_Ki_fhat = Ki_fhat.copy()
         return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
 
-    def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
+    def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
         """
         Rasmussen's numerically stable mode finding
         For nomenclature see Rasmussen & Williams 2006
@@ -89,7 +92,12 @@ class Laplace(LatentFunctionInference):
 
         #define the objective function (to be maximised)
         def obj(Ki_f, f):
-            return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
+            ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
+            if np.isnan(ll):
+                return -np.inf
+            else:
+                return ll
+
 
         difference = np.inf
         iteration = 0
@@ -104,7 +112,7 @@ class Laplace(LatentFunctionInference):
             W_f = W*f
 
             b = W_f + grad # R+W p46 line 6.
-            W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
+            W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
             W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
 
             #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
@@ -121,7 +129,9 @@ class Laplace(LatentFunctionInference):
             step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
             Ki_f_new = Ki_f + step*dKi_f
             f_new = np.dot(K, Ki_f_new)
-
+            #print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
+            if obj(Ki_f_new, f_new) < obj(Ki_f, f):
+                raise ValueError("Shouldn't happen, brent optimization failing")
             difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
             Ki_f = Ki_f_new
             f = f_new
@@ -152,14 +162,10 @@ class Laplace(LatentFunctionInference):
         if np.any(np.isnan(W)):
             raise ValueError('One or more element(s) of W is NaN')
 
-        K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
-
-        #compute vital matrices
-        C = np.dot(LiW12, K)
-        Ki_W_i  = K - C.T.dot(C)
+        K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
 
         #compute the log marginal
-        log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L)))
+        log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
 
         # Compute matrices for derivatives
         dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
@@ -196,23 +202,23 @@ class Laplace(LatentFunctionInference):
             dL_dthetaL = np.zeros(num_params)
             for thetaL_i in range(num_params):
                 #Explicit
-                dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
+                dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
                                 # The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
-                                + 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
+                                  + 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
                                 )
 
                 #Implicit
-                dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
-                #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
+                dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
+                #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
                 dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
-                dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
+                dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
 
         else:
             dL_dthetaL = np.zeros(likelihood.size)
 
         return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
 
-    def _compute_B_statistics(self, K, W, log_concave):
+    def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
         """
         Rasmussen suggests the use of a numerically stable positive definite matrix B
         Which has a positive diagonal elements and can be easily inverted
@@ -225,7 +231,7 @@ class Laplace(LatentFunctionInference):
         """
         if not log_concave:
             #print "Under 1e-10: {}".format(np.sum(W < 1e-6))
-            W[W<1e-6] = 1e-6
+            W = np.clip(W, 1e-6, 1e+30)
             # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
             #W.__setitem__(W < 1e-6, 1e-6, update=False)  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                 # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
@@ -247,5 +253,160 @@ class Laplace(LatentFunctionInference):
         #K_Wi_i_2 , _= dpotri(L2)
         #symmetrify(K_Wi_i_2)
 
-        return K_Wi_i, L, LiW12
+        #compute vital matrices
+        C = np.dot(LiW12, K)
+        Ki_W_i  = K - C.T.dot(C)
 
+        I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
+        logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
+
+        return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
+
+class LaplaceBlock(Laplace):
+    def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
+        Ki_f = Ki_f_init.copy()
+        f = np.dot(K, Ki_f)
+
+        #define the objective function (to be maximised)
+        def obj(Ki_f, f):
+            ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
+            if np.isnan(ll):
+                return -np.inf
+            else:
+                return ll
+
+        difference = np.inf
+        iteration = 0
+
+        I = np.eye(K.shape[0])
+        while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
+            W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
+
+            W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
+
+            W_f = np.dot(W, f)
+            grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
+
+            b = W_f + grad # R+W p46 line 6.
+            K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
+
+            #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
+            #a = (I - (K+Wi)i*K)*b
+            full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
+            dKi_f = full_step_Ki_f - Ki_f
+
+            #define an objective for the line search (minimize this one)
+            def inner_obj(step_size):
+                Ki_f_trial = Ki_f + step_size*dKi_f
+                f_trial = np.dot(K, Ki_f_trial)
+                return -obj(Ki_f_trial, f_trial)
+
+            #use scipy for the line search, the compute new values of f, Ki_f
+            step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
+
+            Ki_f_new = Ki_f + step*dKi_f
+            f_new = np.dot(K, Ki_f_new)
+
+            difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
+            Ki_f = Ki_f_new
+            f = f_new
+            iteration += 1
+
+        #Warn of bad fits
+        if difference > self._mode_finding_tolerance:
+            if not self.bad_fhat:
+                warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
+            self._previous_Ki_fhat = np.zeros_like(Y)
+            self.bad_fhat = True
+        elif self.bad_fhat:
+            self.bad_fhat = False
+            warnings.warn("f_hat now fine again")
+        if iteration > self._mode_finding_max_iter:
+            warnings.warn("didn't find the best")
+
+        return f, Ki_f
+
+    def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
+        #At this point get the hessian matrix (or vector as W is diagonal)
+        W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
+
+        W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
+
+        K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
+
+        #compute the log marginal
+        #FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
+        log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
+
+        #Compute vival matrices for derivatives
+        dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
+
+        #dL_dfhat = np.zeros((f_hat.shape[0]))
+        #for i in range(f_hat.shape[0]):
+            #dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
+
+        dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
+
+        woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
+
+        ####################
+        #compute dL_dK#
+        ####################
+        if kern.size > 0 and not kern.is_fixed:
+            #Explicit
+            explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
+
+            #Implicit
+            implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
+            #implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
+
+            dL_dK = explicit_part + implicit_part
+        else:
+            dL_dK = np.zeros_like(K)
+
+        ####################
+        #compute dL_dthetaL#
+        ####################
+        if likelihood.size > 0 and not likelihood.is_fixed:
+            raise NotImplementedError
+        else:
+            dL_dthetaL = np.zeros(likelihood.size)
+
+        #self.K_Wi_i = K_Wi_i
+        #self.Ki_W_i = Ki_W_i
+        #self.W = W
+        #self.K = K
+        #self.dL_dfhat = dL_dfhat
+        #self.explicit_part = explicit_part
+        #self.implicit_part = implicit_part
+        return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
+
+    def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
+        """
+        Rasmussen suggests the use of a numerically stable positive definite matrix B
+        Which has a positive diagonal element and can be easyily inverted
+
+        :param K: Prior Covariance matrix evaluated at locations X
+        :type K: NxN matrix
+        :param W: Negative hessian at a point (diagonal matrix)
+        :type W: Vector of diagonal values of hessian (1xN)
+        :returns: (K_Wi_i, L_B, not_provided)
+        """
+        #w = GPy.util.diag.view(W)
+        #W[:] = np.where(w<1e-6, 1e-6, w)
+
+        #B = I + KW
+        B = np.eye(K.shape[0]) + np.dot(K, W)
+        #Bi, L, Li, logdetB = pdinv(B)
+        Bi = np.linalg.inv(B)
+
+        #K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
+        K_Wi_i = np.dot(W, Bi)
+
+        #self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
+        #self.B = B
+        #self.Bi = Bi
+        Ki_W_i = np.dot(Bi, K)
+
+        sign, logdetB = np.linalg.slogdet(B)
+        return K_Wi_i, sign*logdetB, Bi, Ki_W_i
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index 26de274b..6277c1dc 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -248,3 +248,41 @@ class Bernoulli(Likelihood):
 
     def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
         pass
+
+    def variational_expectations(self, Y, m, v, gh_points=None):
+        """
+        Probit specific numerical stable integrations
+        """
+        #Move to be faster
+        if self.gp_link:
+            pass
+        Yshape = Y.shape
+        mshape = m.shape
+        vshape = v.shape
+        Y = Y.flatten()
+        m = m.flatten()
+        v = v.flatten()
+
+        assert Yshape == mshape
+        assert mshape == vshape
+
+        Ysign = np.where(Y==1,1,-1).flatten()
+        gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
+
+        #Shapes a bit weird
+        X = gh_x[None,:]*np.sqrt(2.*v[:, None]) + (m*Ysign)[:,None]
+        p = stats.norm.cdf(X)
+        p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
+        N = stats.norm.pdf(X)
+        F = np.log(p).dot(gh_w)
+        NoverP = N/p
+        dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
+        dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w)
+        if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
+            stop
+        if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
+            stop
+        #FIXME: Might be wrong reshaping
+        return F.reshape(Yshape), dF_dm.reshape(mshape), dF_dv.reshape(vshape), None
+
+
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 4e7de9e3..021ec269 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -34,7 +34,9 @@ class Gaussian(Likelihood):
         if gp_link is None:
             gp_link = link_functions.Identity()
 
-        assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
+        if not isinstance(gp_link, link_functions.Identity):
+            print "Warning, Exact inference is not implemeted for non-identity link functions,\
+            if you are not already, ensure Laplace inference_method is used"
 
         super(Gaussian, self).__init__(gp_link, name=name)
 
@@ -263,16 +265,19 @@ class Gaussian(Likelihood):
         return d2logpdf_dlink2_dvar
 
     def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
-        dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
-        return dlogpdf_dvar
+        dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
+        dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
+        return dlogpdf_dtheta
 
     def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
-        dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
-        return dlogpdf_dlink_dvar
+        dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
+        dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
+        return dlogpdf_dlink_dtheta
 
     def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
-        d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
-        return d2logpdf_dlink2_dvar
+        d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
+        d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
+        return d2logpdf_dlink2_dtheta
 
     def _mean(self, gp):
         """
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index b1e78b93..ee2f5368 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -5,7 +5,7 @@ import numpy as np
 from scipy import stats,special
 import scipy as sp
 import link_functions
-from ..util.misc import chain_1, chain_2, chain_3
+from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian
 from scipy.integrate import quad
 import warnings
 from ..core.parameterization import Parameterized
@@ -39,6 +39,7 @@ class Likelihood(Parameterized):
         assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
         self.gp_link = gp_link
         self.log_concave = False
+        self.not_block_really = False
 
     def _gradients(self,partial):
         return np.zeros(0)
@@ -189,20 +190,27 @@ class Likelihood(Parameterized):
 
         """
         #conditional_mean: the edpected value of y given some f, under this likelihood
+        fmin = -np.inf
+        fmax = np.inf
         def int_mean(f,m,v):
-            p = np.exp(-(0.5/v)*np.square(f - m))
+            exponent = -(0.5/v)*np.square(f - m)
+            #If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
             #If p is zero then conditional_mean will overflow
+            assert v.all() > 0
+            p = safe_exp(exponent)
+
+            #If p is zero then conditional_variance will overflow
             if p < 1e-10:
                 return 0.
             else:
                 return self.conditional_mean(f)*p
-        scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
+        scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
         mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
 
         return mean
 
     def _conditional_mean(self, f):
-        """Quadrature calculation of the conditional mean: E(Y_star|f)"""
+        """Quadrature calculation of the conditional mean: E(Y_star|f_star)"""
         raise NotImplementedError, "implement this function to make predictions"
 
     def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
@@ -210,7 +218,7 @@ class Likelihood(Parameterized):
         Approximation to the predictive variance: V(Y_star)
 
         The following variance decomposition is used:
-        V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
+        V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
 
         :param mu: mean of posterior
         :param sigma: standard deviation of posterior
@@ -220,15 +228,22 @@ class Likelihood(Parameterized):
         #sigma2 = sigma**2
         normalizer = np.sqrt(2*np.pi*variance)
 
+        fmin_v = -np.inf
+        fmin_m = np.inf
+        fmin = -np.inf
+        fmax = np.inf
+
+        from ..util.misc import safe_exp
         # E( V(Y_star|f_star) )
         def int_var(f,m,v):
-            p = np.exp(-(0.5/v)*np.square(f - m))
+            exponent = -(0.5/v)*np.square(f - m)
+            p = safe_exp(exponent)
             #If p is zero then conditional_variance will overflow
             if p < 1e-10:
                 return 0.
             else:
                 return self.conditional_variance(f)*p
-        scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
+        scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
         exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
 
         #V( E(Y_star|f_star) ) =  E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
@@ -240,14 +255,15 @@ class Likelihood(Parameterized):
 
         #E( E(Y_star|f_star)**2 )
         def int_pred_mean_sq(f,m,v,predictive_mean_sq):
-            p = np.exp(-(0.5/v)*np.square(f - m))
+            exponent = -(0.5/v)*np.square(f - m)
+            p = np.exp(exponent)
             #If p is zero then conditional_mean**2 will overflow
             if p < 1e-10:
                 return 0.
             else:
                 return self.conditional_mean(f)**2*p
 
-        scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
+        scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
         exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
 
         var_exp = exp_exp2 - predictive_mean_sq
@@ -295,8 +311,18 @@ class Likelihood(Parameterized):
         :returns: likelihood evaluated for this point
         :rtype: float
         """
-        inv_link_f = self.gp_link.transf(f)
-        return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
+        if isinstance(self.gp_link, link_functions.Identity):
+            return self.pdf_link(f, y, Y_metadata=Y_metadata)
+        else:
+            inv_link_f = self.gp_link.transf(f)
+            return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
+
+    def logpdf_sum(self, f, y, Y_metadata=None):
+        """
+        Convenience function that can overridden for functions where this could
+        be computed more efficiently (Theano?)
+        """
+        return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
 
     def logpdf(self, f, y, Y_metadata=None):
         """
@@ -313,8 +339,11 @@ class Likelihood(Parameterized):
         :returns: log likelihood evaluated for this point
         :rtype: float
         """
-        inv_link_f = self.gp_link.transf(f)
-        return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
+        if isinstance(self.gp_link, link_functions.Identity):
+            return self.logpdf_link(f, y, Y_metadata=Y_metadata)
+        else:
+            inv_link_f = self.gp_link.transf(f)
+            return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
 
     def dlogpdf_df(self, f, y, Y_metadata=None):
         """
@@ -332,11 +361,15 @@ class Likelihood(Parameterized):
         :returns: derivative of log likelihood evaluated for this point
         :rtype: 1xN array
         """
-        inv_link_f = self.gp_link.transf(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
-        dlink_df = self.gp_link.dtransf_df(f)
-        return chain_1(dlogpdf_dlink, dlink_df)
+        if isinstance(self.gp_link, link_functions.Identity):
+            return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
+        else:
+            inv_link_f = self.gp_link.transf(f)
+            dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
+            dlink_df = self.gp_link.dtransf_df(f)
+            return chain_1(dlogpdf_dlink, dlink_df)
 
+    @blockify_hessian
     def d2logpdf_df2(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the second derivative of log likelihood using it
@@ -353,13 +386,18 @@ class Likelihood(Parameterized):
         :returns: second derivative of log likelihood evaluated for this point (diagonal only)
         :rtype: 1xN array
         """
-        inv_link_f = self.gp_link.transf(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
-        dlink_df = self.gp_link.dtransf_df(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
-        d2link_df2 = self.gp_link.d2transf_df2(f)
-        return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
+        if isinstance(self.gp_link, link_functions.Identity):
+            d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
+        else:
+            inv_link_f = self.gp_link.transf(f)
+            d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
+            dlink_df = self.gp_link.dtransf_df(f)
+            dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
+            d2link_df2 = self.gp_link.d2transf_df2(f)
+            d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
+        return d2logpdf_df2
 
+    @blockify_third
     def d3logpdf_df3(self, f, y, Y_metadata=None):
         """
         Evaluates the link function link(f) then computes the third derivative of log likelihood using it
@@ -376,64 +414,96 @@ class Likelihood(Parameterized):
         :returns: third derivative of log likelihood evaluated for this point
         :rtype: float
         """
-        inv_link_f = self.gp_link.transf(f)
-        d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
-        dlink_df = self.gp_link.dtransf_df(f)
-        d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
-        d2link_df2 = self.gp_link.d2transf_df2(f)
-        dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
-        d3link_df3 = self.gp_link.d3transf_df3(f)
-        return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
+        if isinstance(self.gp_link, link_functions.Identity):
+            d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
+        else:
+            inv_link_f = self.gp_link.transf(f)
+            d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
+            dlink_df = self.gp_link.dtransf_df(f)
+            d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
+            d2link_df2 = self.gp_link.d2transf_df2(f)
+            dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
+            d3link_df3 = self.gp_link.d3transf_df3(f)
+            d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
+        return d3logpdf_df3
+
 
     def dlogpdf_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
         if self.size > 0:
-            inv_link_f = self.gp_link.transf(f)
-            return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
+            if self.not_block_really:
+                raise NotImplementedError("Need to make a decorator for this!")
+            if isinstance(self.gp_link, link_functions.Identity):
+                return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
+            else:
+                inv_link_f = self.gp_link.transf(f)
+                return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
         else:
             # There are no parameters so return an empty array for derivatives
-            return np.zeros([1, 0])
+            return np.zeros((0, f.shape[0], f.shape[1]))
 
     def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
         if self.size > 0:
-            inv_link_f = self.gp_link.transf(f)
-            dlink_df = self.gp_link.dtransf_df(f)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
-            return chain_1(dlogpdf_dlink_dtheta, dlink_df)
+            if self.not_block_really:
+                raise NotImplementedError("Need to make a decorator for this!")
+            if isinstance(self.gp_link, link_functions.Identity):
+                return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
+            else:
+                inv_link_f = self.gp_link.transf(f)
+                dlink_df = self.gp_link.dtransf_df(f)
+                dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
+
+                dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
+                #Chain each parameter of hte likelihood seperately
+                for p in range(self.size):
+                    dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
+                return dlogpdf_df_dtheta
+                #return chain_1(dlogpdf_dlink_dtheta, dlink_df)
         else:
             # There are no parameters so return an empty array for derivatives
-            return np.zeros([f.shape[0], 0])
+            return np.zeros((0, f.shape[0], f.shape[1]))
 
     def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
         """
         TODO: Doc strings
         """
         if self.size > 0:
-            inv_link_f = self.gp_link.transf(f)
-            dlink_df = self.gp_link.dtransf_df(f)
-            d2link_df2 = self.gp_link.d2transf_df2(f)
-            d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
-            dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
-            return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
+            if self.not_block_really:
+                raise NotImplementedError("Need to make a decorator for this!")
+            if isinstance(self.gp_link, link_functions.Identity):
+                return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
+            else:
+                inv_link_f = self.gp_link.transf(f)
+                dlink_df = self.gp_link.dtransf_df(f)
+                d2link_df2 = self.gp_link.d2transf_df2(f)
+                d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
+                dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
+
+                d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
+                #Chain each parameter of hte likelihood seperately
+                for p in range(self.size):
+                    d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
+                return d2logpdf_df2_dtheta
+                #return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
         else:
             # There are no parameters so return an empty array for derivatives
-            return np.zeros([f.shape[0], 0])
+            return np.zeros((0, f.shape[0], f.shape[1]))
 
     def _laplace_gradients(self, f, y, Y_metadata=None):
-        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
+        dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
         dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
         d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
 
         #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
         # ensure we have gradients for every parameter we want to optimize
-        assert len(dlogpdf_dtheta) == self.size #1 x num_param array
-        assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
-        assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
+        assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
+        assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
+        assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
 
         return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
 
@@ -454,19 +524,98 @@ class Likelihood(Parameterized):
 
     def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
         #compute the quantiles by sampling!!!
-        N_samp = 1000
+        N_samp = 50
         s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
         #ss_f = s.flatten()
         #ss_y = self.samples(ss_f, Y_metadata)
+        #ss_y = self.samples(s, Y_metadata, samples=100)
         ss_y = self.samples(s, Y_metadata)
         #ss_y = ss_y.reshape(mu.shape[0], N_samp)
 
         return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
 
-    def samples(self, gp, Y_metadata=None):
+    def samples(self, gp, Y_metadata=None, samples=1):
         """
         Returns a set of samples of observations based on a given value of the latent variable.
 
         :param gp: latent variable
+        :param samples: number of samples to take for each f location
         """
-        raise NotImplementedError
+        raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
+                                  MCMC_pdf_samples in likelihood.py and write samples function
+                                  using this, beware this is a simple implementation
+                                  of Metropolis and will not work well for all likelihoods""")
+
+    def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
+        """
+        Simple implementation of Metropolis sampling algorithm
+
+        Will run a parallel chain for each input dimension (treats each f independently)
+        Thus assumes f*_1 independant of f*_2 etc.
+
+        :param num_samples: Number of samples to take
+        :param fNew: f at which to sample around
+        :param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
+        :param stepsize: Stepsize for the normal proposal distribution (will need modifying)
+        :param burnin: number of samples to use for burnin (will need modifying)
+        :param Y_metadata: Y_metadata for pdf
+        """
+        print "Warning, using MCMC for sampling y*, needs to be tuned!"
+        if starting_loc is None:
+            starting_loc = fNew
+        from functools import partial
+        logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
+        pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
+        #Should be the link function of f is a good starting point
+        #(i.e. the point before you corrupt it with the likelihood)
+        par_chains = starting_loc.shape[0]
+        chain_values = np.zeros((par_chains, num_samples))
+        chain_values[:, 0][:,None] = starting_loc
+        #Use same stepsize for all par_chains
+        stepsize = np.ones(par_chains)*stepsize
+        accepted = np.zeros((par_chains, num_samples+burn_in))
+        accept_ratio = np.zeros(num_samples+burn_in)
+        #Whilst burning in, only need to keep the previous lot
+        burnin_cache = np.zeros(par_chains)
+        burnin_cache[:] = starting_loc.flatten()
+        burning_in = True
+        for i in xrange(burn_in+num_samples):
+            next_ind = i-burn_in
+            if burning_in:
+                old_y = burnin_cache
+            else:
+                old_y = chain_values[:,next_ind-1]
+
+            old_lik = pdf(old_y)
+            #Propose new y from Gaussian proposal
+            new_y = np.random.normal(loc=old_y, scale=stepsize)
+            new_lik = pdf(new_y)
+            #Accept using Metropolis (not hastings) acceptance
+            #Always accepts if new_lik > old_lik
+            accept_probability = np.minimum(1, new_lik/old_lik)
+            u = np.random.uniform(0,1,par_chains)
+            #print "Accept prob: ", accept_probability
+            accepts = u < accept_probability
+            if burning_in:
+                burnin_cache[accepts] = new_y[accepts]
+                burnin_cache[~accepts] = old_y[~accepts]
+                if i == burn_in:
+                    burning_in = False
+                    chain_values[:,0] = burnin_cache
+            else:
+                #If it was accepted then new_y becomes the latest sample
+                chain_values[accepts, next_ind] = new_y[accepts]
+                #Otherwise use old y as the sample
+                chain_values[~accepts, next_ind] = old_y[~accepts]
+
+            accepted[~accepts, i] = 0
+            accepted[accepts, i] = 1
+            accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
+
+            #Show progress
+            if i % int((burn_in+num_samples)*0.1) == 0:
+                print "{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i)
+                print "Last run accept ratio: ", accept_ratio[i]
+
+        print "Average accept ratio: ", np.mean(accept_ratio)
+        return chain_values
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index dbd4d94f..f16a55e9 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -226,17 +226,18 @@ class StudentT(Likelihood):
     def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
         dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
-        return np.hstack((dlogpdf_dvar, dlogpdf_dv))
+        return np.array((dlogpdf_dvar, dlogpdf_dv))
 
     def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
         dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
         dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
-        return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
+        return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
 
     def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
         d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
         d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
-        return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
+
+        return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
 
     def predictive_mean(self, mu, sigma, Y_metadata=None):
         # The comment here confuses mean and median.
diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py
index 877d1aa0..7b6164c1 100644
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@@ -10,7 +10,7 @@ from GPy.likelihoods import link_functions
 from GPy.core.parameterization import Param
 from functools import partial
 #np.random.seed(300)
-#np.random.seed(7)
+#np.random.seed(4)
 
 #np.seterr(divide='raise')
 def dparam_partial(inst_func, *args):
@@ -52,8 +52,17 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
     zipped_params = zip(params, params_names)
     for param_ind, (param_val, param_name) in enumerate(zipped_params):
         #Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
-        fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
-        dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
+        f_ = partial_f(param_val, param_name)
+        df_ = partial_df(param_val, param_name)
+        #Reshape it such that we have a 3d matrix incase, that is we want it (?, N, D) regardless of whether ? is num_params or not
+        f_ = f_.reshape(-1, f_.shape[0], f_.shape[1])
+        df_ = df_.reshape(-1, f_.shape[0], f_.shape[1])
+
+        #Get the number of f and number of dimensions
+        fnum = f_.shape[-2]
+        fdim = f_.shape[-1]
+        dfnum = df_.shape[-2]
+
         for fixed_val in range(dfnum):
             #dlik and dlik_dvar gives back 1 value for each
             f_ind = min(fnum, fixed_val+1) - 1
@@ -61,9 +70,13 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
             #Make grad checker with this param moving, note that set_params is NOT being called
             #The parameter is being set directly with __setattr__
             #Check only the parameter and function value we wish to check at a time
-            grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
-                                   lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
-                                   param_val, [param_name])
+            #func = lambda p_val, fnum, fdim, param_ind, f_ind, param_ind: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
+            #dfunc_dparam = lambda d_val, fnum, fdim, param_ind, fixed_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
+
+            #First we reshape the output such that it is (num_params, N, D) then we pull out the relavent parameter-findex and checkgrad just this index at a time
+            func = lambda p_val: partial_f(p_val, param_name).reshape(-1, fnum, fdim)[param_ind, f_ind, :]
+            dfunc_dparam = lambda d_val: partial_df(d_val, param_name).reshape(-1, fnum, fdim)[param_ind, fixed_val, :]
+            grad = GradientChecker(func, dfunc_dparam, param_val, [param_name])
 
             if constraints is not None:
                 for constrain_param, constraint in constraints:
@@ -104,37 +117,9 @@ class TestNoiseModels(object):
 
         self.var = 0.2
 
-        self.var = np.random.rand(1)
-
         #Make a bigger step as lower bound can be quite curved
         self.step = 1e-4
 
-    def tearDown(self):
-        self.Y = None
-        self.f = None
-        self.X = None
-
-    def test_scale2_models(self):
-        self.setUp()
-
-        ####################################################
-        # Constraint wrappers so we can just list them off #
-        ####################################################
-        def constrain_fixed(regex, model):
-            model[regex].constrain_fixed()
-
-        def constrain_negative(regex, model):
-            model[regex].constrain_negative()
-
-        def constrain_positive(regex, model):
-            model[regex].constrain_positive()
-
-        def constrain_bounded(regex, model, lower, upper):
-            """
-            Used like: partial(constrain_bounded, lower=0, upper=1)
-            """
-            model[regex].constrain_bounded(lower, upper)
-
         """
         Dictionary where we nest models we would like to check
             Name: {
@@ -149,136 +134,170 @@ class TestNoiseModels(object):
                 "link_f_constraints": [constraint_wrappers, listed_here]
                 }
         """
-        noise_models = {"Student_t_default": {
-                            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [self.var],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                #"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_1_var": {
-                            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [1.0],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_small_deg_free": {
-                            "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [self.var],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_small_var": {
-                            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [0.001],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_large_var": {
-                            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [10.0],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_approx_gauss": {
-                            "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [self.var],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Student_t_log": {
-                            "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
-                            "grad_params": {
-                                "names": [".*t_scale2"],
-                                "vals": [self.var],
-                                "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)]
-                                },
-                            "laplace": True
-                            },
-                        "Gaussian_default": {
-                            "model": GPy.likelihoods.Gaussian(variance=self.var),
-                            "grad_params": {
-                                "names": [".*variance"],
-                                "vals": [self.var],
-                                "constraints": [(".*variance", constrain_positive)]
-                                },
-                            "laplace": True,
-                            "ep": False # FIXME: Should be True when we have it working again
-                            },
-                        #"Gaussian_log": {
-                            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
-                            #"grad_params": {
-                                #"names": ["noise_model_variance"],
-                                #"vals": [self.var],
-                                #"constraints": [constrain_positive]
-                                #},
-                            #"laplace": True
-                            #},
-                        #"Gaussian_probit": {
-                            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
-                            #"grad_params": {
-                                #"names": ["noise_model_variance"],
-                                #"vals": [self.var],
-                                #"constraints": [constrain_positive]
-                                #},
-                            #"laplace": True
-                            #},
-                        #"Gaussian_log_ex": {
-                            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
-                            #"grad_params": {
-                                #"names": ["noise_model_variance"],
-                                #"vals": [self.var],
-                                #"constraints": [constrain_positive]
-                                #},
-                            #"laplace": True
-                            #},
-                        "Bernoulli_default": {
-                            "model": GPy.likelihoods.Bernoulli(),
-                            "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
-                            "laplace": True,
-                            "Y": self.binary_Y,
-                            "ep": False # FIXME: Should be True when we have it working again
-                            },
-                        "Exponential_default": {
-                            "model": GPy.likelihoods.Exponential(),
-                            "link_f_constraints": [constrain_positive],
-                            "Y": self.positive_Y,
-                            "laplace": True,
-                        },
-                        "Poisson_default": {
-                            "model": GPy.likelihoods.Poisson(),
-                            "link_f_constraints": [constrain_positive],
-                            "Y": self.integer_Y,
-                            "laplace": True,
-                            "ep": False #Should work though...
-                        }#,
-                        #GAMMA needs some work!"Gamma_default": {
-                            #"model": GPy.likelihoods.Gamma(),
-                            #"link_f_constraints": [constrain_positive],
-                            #"Y": self.positive_Y,
-                            #"laplace": True
-                        #}
-                    }
+        self.noise_models = {"Student_t_default": {
+            "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+            "grad_params": {
+                "names": [".*t_scale2"],
+                "vals": [self.var],
+                "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+            },
+            "laplace": True
+            },
+            "Student_t_1_var": {
+                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "grad_params": {
+                    "names": [".*t_scale2"],
+                    "vals": [1.0],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                },
+                "laplace": True
+            },
+            "Student_t_small_deg_free": {
+                "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
+                "grad_params": {
+                    "names": [".*t_scale2"],
+                    "vals": [self.var],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                },
+                "laplace": True
+            },
+            "Student_t_small_var": {
+                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "grad_params": {
+                    "names": [".*t_scale2"],
+                    "vals": [0.001],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                },
+                "laplace": True
+            },
+            "Student_t_large_var": {
+                "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
+                "grad_params": {
+                    "names": [".*t_scale2"],
+                    "vals": [10.0],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                },
+                "laplace": True
+            },
+            "Student_t_approx_gauss": {
+                "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
+                "grad_params": {
+                    "names": [".*t_scale2"],
+                    "vals": [self.var],
+                    "constraints": [(".*t_scale2", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+                },
+                "laplace": True
+            },
+            #"Student_t_log": {
+            #"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
+            #"grad_params": {
+            #"names": [".*t_noise"],
+            #"vals": [self.var],
+            #"constraints": [(".*t_noise", self.constrain_positive), (".*deg_free", self.constrain_fixed)]
+            #},
+            #"laplace": True
+            #},
+            "Gaussian_default": {
+                "model": GPy.likelihoods.Gaussian(variance=self.var),
+                "grad_params": {
+                    "names": [".*variance"],
+                    "vals": [self.var],
+                    "constraints": [(".*variance", self.constrain_positive)]
+                },
+                "laplace": True,
+                "ep": False # FIXME: Should be True when we have it working again
+            },
+            "Gaussian_log": {
+                "model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
+                "grad_params": {
+                    "names": [".*variance"],
+                    "vals": [self.var],
+                    "constraints": [(".*variance", self.constrain_positive)]
+                },
+                "laplace": True
+            },
+            #"Gaussian_probit": {
+            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
+            #"grad_params": {
+            #"names": ["noise_model_variance"],
+            #"vals": [self.var],
+            #"constraints": [constrain_positive]
+            #},
+            #"laplace": True
+            #},
+            #"Gaussian_log_ex": {
+            #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
+            #"grad_params": {
+            #"names": ["noise_model_variance"],
+            #"vals": [self.var],
+            #"constraints": [constrain_positive]
+            #},
+            #"laplace": True
+            #},
+            "Bernoulli_default": {
+                "model": GPy.likelihoods.Bernoulli(),
+                "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
+                "laplace": True,
+                "Y": self.binary_Y,
+                "ep": False # FIXME: Should be True when we have it working again
+            },
+            "Exponential_default": {
+                "model": GPy.likelihoods.Exponential(),
+                "link_f_constraints": [self.constrain_positive],
+                "Y": self.positive_Y,
+                "laplace": True,
+            },
+            "Poisson_default": {
+                "model": GPy.likelihoods.Poisson(),
+                "link_f_constraints": [self.constrain_positive],
+                "Y": self.integer_Y,
+                "laplace": True,
+                "ep": False #Should work though...
+            },
+            #,
+            #GAMMA needs some work!"Gamma_default": {
+            #"model": GPy.likelihoods.Gamma(),
+            #"link_f_constraints": [constrain_positive],
+            #"Y": self.positive_Y,
+            #"laplace": True
+            #}
+        }
 
-        for name, attributes in noise_models.iteritems():
+
+    ####################################################
+    # Constraint wrappers so we can just list them off #
+    ####################################################
+    def constrain_fixed(self, regex, model):
+        model[regex].constrain_fixed()
+
+    def constrain_negative(self, regex, model):
+        model[regex].constrain_negative()
+
+    def constrain_positive(self, regex, model):
+        model[regex].constrain_positive()
+
+    def constrain_fixed_below(self, regex, model, up_to):
+        model[regex][0:up_to].constrain_fixed()
+
+    def constrain_fixed_above(self, regex, model, above):
+        model[regex][above:].constrain_fixed()
+
+    def constrain_bounded(self, regex, model, lower, upper):
+        """
+        Used like: partial(constrain_bounded, lower=0, upper=1)
+        """
+        model[regex].constrain_bounded(lower, upper)
+
+
+    def tearDown(self):
+        self.Y = None
+        self.f = None
+        self.X = None
+
+    def test_scale2_models(self):
+        self.setUp()
+
+        for name, attributes in self.noise_models.iteritems():
             model = attributes["model"]
             if "grad_params" in attributes:
                 params = attributes["grad_params"]
@@ -290,7 +309,7 @@ class TestNoiseModels(object):
                 param_vals = []
                 param_names = []
                 constrain_positive = []
-                param_constraints = [] # ??? TODO: Saul to Fix.
+                param_constraints = []
             if "link_f_constraints" in attributes:
                 link_f_constraints = attributes["link_f_constraints"]
             else:
@@ -303,6 +322,10 @@ class TestNoiseModels(object):
                 f = attributes["f"].copy()
             else:
                 f = self.f.copy()
+            if "Y_metadata" in attributes:
+                Y_metadata = attributes["Y_metadata"].copy()
+            else:
+                Y_metadata = None
             if "laplace" in attributes:
                 laplace = attributes["laplace"]
             else:
@@ -317,30 +340,30 @@ class TestNoiseModels(object):
 
             #Required by all
             #Normal derivatives
-            yield self.t_logpdf, model, Y, f
-            yield self.t_dlogpdf_df, model, Y, f
-            yield self.t_d2logpdf_df2, model, Y, f
+            yield self.t_logpdf, model, Y, f, Y_metadata
+            yield self.t_dlogpdf_df, model, Y, f, Y_metadata
+            yield self.t_d2logpdf_df2, model, Y, f, Y_metadata
             #Link derivatives
-            yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints
-            yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints
+            yield self.t_dlogpdf_dlink, model, Y, f, Y_metadata, link_f_constraints
+            yield self.t_d2logpdf_dlink2, model, Y, f, Y_metadata, link_f_constraints
             if laplace:
                 #Laplace only derivatives
-                yield self.t_d3logpdf_df3, model, Y, f
-                yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
+                yield self.t_d3logpdf_df3, model, Y, f, Y_metadata
+                yield self.t_d3logpdf_dlink3, model, Y, f, Y_metadata, link_f_constraints
                 #Params
-                yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
-                yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
-                yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
+                yield self.t_dlogpdf_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
+                yield self.t_dlogpdf_df_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
+                yield self.t_d2logpdf2_df2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
                 #Link params
-                yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
-                yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
-                yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
+                yield self.t_dlogpdf_link_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
+                yield self.t_dlogpdf_dlink_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
+                yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, Y_metadata, param_vals, param_names, param_constraints
 
                 #laplace likelihood gradcheck
-                yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+                yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
             if ep:
                 #ep likelihood gradcheck
-                yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
+                yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
 
 
         self.tearDown()
@@ -349,41 +372,41 @@ class TestNoiseModels(object):
     # dpdf_df's #
     #############
     @with_setup(setUp, tearDown)
-    def t_logpdf(self, model, Y, f):
+    def t_logpdf(self, model, Y, f, Y_metadata):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         #print model._get_params()
         np.testing.assert_almost_equal(
-                model.pdf(f.copy(), Y.copy()).prod(),
-                               np.exp(model.logpdf(f.copy(), Y.copy()).sum())
+                model.pdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).prod(),
+                               np.exp(model.logpdf(f.copy(), Y.copy(), Y_metadata=Y_metadata).sum())
                                )
 
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_df(self, model, Y, f):
+    def t_dlogpdf_df(self, model, Y, f, Y_metadata):
         print "\n{}".format(inspect.stack()[0][3])
         self.description = "\n{}".format(inspect.stack()[0][3])
-        logpdf = functools.partial(np.sum(model.logpdf), y=Y)
-        dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
+        logpdf = functools.partial(np.sum(model.logpdf), y=Y, Y_metadata=Y_metadata)
+        dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
         grad.randomize()
         print model
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
-    def t_d2logpdf_df2(self, model, Y, f):
+    def t_d2logpdf_df2(self, model, Y, f, Y_metadata):
         print "\n{}".format(inspect.stack()[0][3])
-        dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
-        d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
+        dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y, Y_metadata=Y_metadata)
+        d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g')
         grad.randomize()
         print model
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
-    def t_d3logpdf_df3(self, model, Y, f):
+    def t_d3logpdf_df3(self, model, Y, f, Y_metadata):
         print "\n{}".format(inspect.stack()[0][3])
-        d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y)
-        d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y)
+        d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y, Y_metadata=Y_metadata)
+        d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g')
         grad.randomize()
         print model
@@ -393,32 +416,32 @@ class TestNoiseModels(object):
     # df_dparams #
     ##############
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
+    def t_dlogpdf_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
-                    params, params_names, args=(f, Y), constraints=param_constraints,
+                    params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
+    def t_dlogpdf_df_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
-                    params, params_names, args=(f, Y), constraints=param_constraints,
+                    params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
     @with_setup(setUp, tearDown)
-    def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
+    def t_d2logpdf2_df2_dparams(self, model, Y, f, Y_metadata, params, params_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
-                    params, params_names, args=(f, Y), constraints=param_constraints,
+                    params, params_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
@@ -426,10 +449,10 @@ class TestNoiseModels(object):
     # dpdf_dlink's #
     ################
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints):
+    def t_dlogpdf_dlink(self, model, Y, f, Y_metadata, link_f_constraints):
         print "\n{}".format(inspect.stack()[0][3])
-        logpdf = functools.partial(model.logpdf_link, y=Y)
-        dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
+        logpdf = functools.partial(model.logpdf_link, y=Y, Y_metadata=Y_metadata)
+        dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g')
 
         #Apply constraints to link_f values
@@ -442,10 +465,10 @@ class TestNoiseModels(object):
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
-    def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints):
+    def t_d2logpdf_dlink2(self, model, Y, f, Y_metadata, link_f_constraints):
         print "\n{}".format(inspect.stack()[0][3])
-        dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y)
-        d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
+        dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y, Y_metadata=Y_metadata)
+        d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g')
 
         #Apply constraints to link_f values
@@ -458,10 +481,10 @@ class TestNoiseModels(object):
         assert grad.checkgrad(verbose=1)
 
     @with_setup(setUp, tearDown)
-    def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints):
+    def t_d3logpdf_dlink3(self, model, Y, f, Y_metadata, link_f_constraints):
         print "\n{}".format(inspect.stack()[0][3])
-        d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y)
-        d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y)
+        d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y, Y_metadata=Y_metadata)
+        d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y, Y_metadata=Y_metadata)
         grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g')
 
         #Apply constraints to link_f values
@@ -477,32 +500,32 @@ class TestNoiseModels(object):
     # dlink_dparams #
     #################
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
+    def t_dlogpdf_link_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
-                    params, param_names, args=(f, Y), constraints=param_constraints,
+                    params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
     @with_setup(setUp, tearDown)
-    def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
+    def t_dlogpdf_dlink_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
-                    params, param_names, args=(f, Y), constraints=param_constraints,
+                    params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
     @with_setup(setUp, tearDown)
-    def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
+    def t_d2logpdf2_dlink2_dparams(self, model, Y, f, Y_metadata, params, param_names, param_constraints):
         print "\n{}".format(inspect.stack()[0][3])
         print model
         assert (
                 dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
-                    params, param_names, args=(f, Y), constraints=param_constraints,
+                    params, param_names, args=(f, Y, Y_metadata), constraints=param_constraints,
                     randomize=False, verbose=True)
                 )
 
@@ -510,14 +533,15 @@ class TestNoiseModels(object):
     # laplace test #
     ################
     @with_setup(setUp, tearDown)
-    def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
+    def t_laplace_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
         print "\n{}".format(inspect.stack()[0][3])
         #Normalize
         Y = Y/Y.max()
-        white_var = 1e-6
+        white_var = 1e-5
         kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
         laplace_likelihood = GPy.inference.latent_function_inference.Laplace()
-        m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
+
+        m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=laplace_likelihood)
         m['.*white'].constrain_fixed(white_var)
 
         #Set constraints
@@ -526,6 +550,7 @@ class TestNoiseModels(object):
 
         print m
         m.randomize()
+        m.randomize()
 
         #Set params
         for param_num in range(len(param_names)):
@@ -545,14 +570,15 @@ class TestNoiseModels(object):
     # EP test #
     ###########
     @with_setup(setUp, tearDown)
-    def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints):
+    def t_ep_fit_rbf_white(self, model, X, Y, f, Y_metadata, step, param_vals, param_names, constraints):
         print "\n{}".format(inspect.stack()[0][3])
         #Normalize
         Y = Y/Y.max()
         white_var = 1e-6
         kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
         ep_inf = GPy.inference.latent_function_inference.EP()
-        m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
+
+        m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf)
         m['.*white'].constrain_fixed(white_var)
 
         for param_num in range(len(param_names)):
@@ -571,8 +597,8 @@ class LaplaceTests(unittest.TestCase):
     """
 
     def setUp(self):
-        self.N = 5
-        self.D = 3
+        self.N = 15
+        self.D = 1
         self.X = np.random.rand(self.N, self.D)*10
 
         self.real_std = 0.1
@@ -636,20 +662,20 @@ class LaplaceTests(unittest.TestCase):
         exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
         m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
         m1['.*white'].constrain_fixed(1e-6)
-        m1['.*rbf.variance'] = initial_var_guess
-        m1['.*rbf.variance'].constrain_bounded(1e-4, 10)
+        m1['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
         m1.randomize()
 
         gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
         laplace_inf = GPy.inference.latent_function_inference.Laplace()
         m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
         m2['.*white'].constrain_fixed(1e-6)
-        m2['.*rbf.variance'].constrain_bounded(1e-4, 10)
+        m2['.*Gaussian_noise.variance'].constrain_bounded(1e-4, 10)
         m2.randomize()
 
         if debug:
             print m1
             print m2
+
         optimizer = 'scg'
         print "Gaussian"
         m1.optimize(optimizer, messages=debug, ipython_notebook=False)
@@ -687,8 +713,6 @@ class LaplaceTests(unittest.TestCase):
             pb.scatter(X, m1.likelihood.Y, c='g')
             pb.scatter(X, m2.likelihood.Y, c='r', marker='x')
 
-
-
         #Check Y's are the same
         np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
         #Check marginals are the same
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index bf37159d..99bd62b3 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -4,6 +4,16 @@
 import numpy as np
 from config import *
 
+_lim_val = np.finfo(np.float64).max
+
+_lim_val_exp = np.log(_lim_val)
+_lim_val_square = np.sqrt(_lim_val)
+_lim_val_cube = np.power(_lim_val, -3)
+
+def safe_exp(f):
+    clip_f = np.clip(f, -np.inf, _lim_val_exp)
+    return np.exp(clip_f)
+
 def chain_1(df_dg, dg_dx):
     """
     Generic chaining function for first derivative
@@ -11,6 +21,11 @@ def chain_1(df_dg, dg_dx):
     .. math::
         \\frac{d(f . g)}{dx} = \\frac{df}{dg} \\frac{dg}{dx}
     """
+    if np.all(dg_dx==1.):
+        return df_dg
+    if len(df_dg) > 1 and df_dg.shape[-1] > 1:
+        import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+        raise NotImplementedError('Not implemented for matricies yet')
     return df_dg * dg_dx
 
 def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@@ -20,7 +35,13 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
     .. math::
         \\frac{d^{2}(f . g)}{dx^{2}} = \\frac{d^{2}f}{dg^{2}}(\\frac{dg}{dx})^{2} + \\frac{df}{dg}\\frac{d^{2}g}{dx^{2}}
     """
-    return d2f_dg2*(dg_dx**2) + df_dg*d2g_dx2
+    if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
+        return d2f_dg2
+    if  len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
+        raise NotImplementedError('Not implemented for matricies yet')
+    #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
+    dg_dx_2 = dg_dx**2
+    return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
 
 def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
     """
@@ -29,11 +50,18 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
     .. math::
         \\frac{d^{3}(f . g)}{dx^{3}} = \\frac{d^{3}f}{dg^{3}}(\\frac{dg}{dx})^{3} + 3\\frac{d^{2}f}{dg^{2}}\\frac{dg}{dx}\\frac{d^{2}g}{dx^{2}} + \\frac{df}{dg}\\frac{d^{3}g}{dx^{3}}
     """
-    return d3f_dg3*(dg_dx**3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
+    if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
+        return d3f_dg3
+    if (  (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
+           or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
+        raise NotImplementedError('Not implemented for matricies yet')
+    #dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
+    dg_dx_3 = dg_dx**3
+    return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
 
 def opt_wrapper(m, **kwargs):
     """
-    This function just wraps the optimization procedure of a GPy
+    Thit function just wraps the optimization procedure of a GPy
     object so that optimize() pickleable (necessary for multiprocessing).
     """
     m.optimize(**kwargs)
@@ -96,3 +124,47 @@ from :class:ndarray)"""
     if len(param) == 1:
         return param[0].view(np.ndarray)
     return [x.view(np.ndarray) for x in param]
+
+def blockify_hessian(func):
+    def wrapper_func(self, *args, **kwargs):
+        # Invoke the wrapped function first
+        retval = func(self, *args, **kwargs)
+        # Now do something here with retval and/or action
+        if self.not_block_really and (retval.shape[0] != retval.shape[1]):
+            return np.diagflat(retval)
+        else:
+            return retval
+    return wrapper_func
+
+def blockify_third(func):
+    def wrapper_func(self, *args, **kwargs):
+        # Invoke the wrapped function first
+        retval = func(self, *args, **kwargs)
+        # Now do something here with retval and/or action
+        if self.not_block_really and (len(retval.shape) < 3):
+            num_data = retval.shape[0]
+            d3_block_cache = np.zeros((num_data, num_data, num_data))
+            diag_slice = range(num_data)
+            d3_block_cache[diag_slice, diag_slice, diag_slice] = np.squeeze(retval)
+            return d3_block_cache
+        else:
+            return retval
+    return wrapper_func
+
+def blockify_dhess_dtheta(func):
+    def wrapper_func(self, *args, **kwargs):
+        # Invoke the wrapped function first
+        retval = func(self, *args, **kwargs)
+        # Now do something here with retval and/or action
+        if self.not_block_really and (len(retval.shape) < 3):
+            num_data = retval.shape[0]
+            num_params = retval.shape[-1]
+            dhess_dtheta = np.zeros((num_data, num_data, num_params))
+            diag_slice = range(num_data)
+            for param_ind in range(num_params):
+                dhess_dtheta[diag_slice, diag_slice, param_ind] = np.squeeze(retval[:,param_ind])
+            return dhess_dtheta
+        else:
+            return retval
+    return wrapper_func
+

From 4d27fddd375cda05a63579706defa3af0877c4a2 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 27 Mar 2015 14:24:24 +0000
Subject: [PATCH 130/166] Small tidying up

---
 GPy/likelihoods/bernoulli.py  | 38 -----------------------------------
 GPy/likelihoods/likelihood.py |  2 +-
 2 files changed, 1 insertion(+), 39 deletions(-)

diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index c398b3a4..f5690aa4 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -248,41 +248,3 @@ class Bernoulli(Likelihood):
 
     def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
         pass
-
-    def variational_expectations(self, Y, m, v, gh_points=None):
-        """
-        Probit specific numerical stable integrations
-        """
-        #Move to be faster
-        if self.gp_link:
-            pass
-        Yshape = Y.shape
-        mshape = m.shape
-        vshape = v.shape
-        Y = Y.flatten()
-        m = m.flatten()
-        v = v.flatten()
-
-        assert Yshape == mshape
-        assert mshape == vshape
-
-        Ysign = np.where(Y==1,1,-1).flatten()
-        gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
-
-        #Shapes a bit weird
-        X = gh_x[None,:]*np.sqrt(2.*v[:, None]) + (m*Ysign)[:,None]
-        p = stats.norm.cdf(X)
-        p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
-        N = stats.norm.pdf(X)
-        F = np.log(p).dot(gh_w)
-        NoverP = N/p
-        dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
-        dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(gh_w)
-        if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
-            stop
-        if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
-            stop
-        #FIXME: Might be wrong reshaping
-        return F.reshape(Yshape), dF_dm.reshape(mshape), dF_dv.reshape(vshape), None
-
-
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index a545d54e..022670a5 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -325,7 +325,7 @@ class Likelihood(Parameterized):
     def logpdf_sum(self, f, y, Y_metadata=None):
         """
         Convenience function that can overridden for functions where this could
-        be computed more efficiently (Theano?)
+        be computed more efficiently
         """
         return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
 

From ba648900d2d3095962f9ced43dee7fbfe52d1c05 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 27 Mar 2015 14:48:18 +0000
Subject: [PATCH 131/166] adding the beginnings of some benchmarks

---
 benchmarks/boston_housing.py | 44 ++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 benchmarks/boston_housing.py

diff --git a/benchmarks/boston_housing.py b/benchmarks/boston_housing.py
new file mode 100644
index 00000000..988e3460
--- /dev/null
+++ b/benchmarks/boston_housing.py
@@ -0,0 +1,44 @@
+import numpy as np
+import GPy
+
+def load_housing_data():
+    X = np.loadtxt('housing.data')
+    X, Y = X[:,:-1], X[:,-1:]
+
+    #scale the X data
+    xmax, xmin = X.max(0), X.min(0)
+    X = (X-xmin)/(xmax-xmin)
+
+    #loy the response
+    Y = np.log(Y)
+    return X, Y
+
+def fit_full_GP():
+    X, Y = load_housing_data()
+    k = GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.Linear(X.shape[1])
+    m = GPy.models.GPRegression(X, Y, kernel=k)
+    m.optimize('bfgs', max_iters=400, gtol=0)
+    return m
+
+def fit_svgp_st():
+    np.random.seed(0)
+    X, Y = load_housing_data()
+
+    Z = X[np.random.permutation(X.shape[0])[:100]]
+    k = GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.Linear(X.shape[1]) + GPy.kern.White(1,0.01)
+
+    lik = GPy.likelihoods.StudentT(deg_free=3.)
+    m = GPy.core.SVGP(X, Y, Z=Z, kernel=k, likelihood=lik)
+    [m.optimize('scg', max_iters=40, gtol=0, messages=1, xtol=0, ftol=0) for i in range(10)]
+    m.optimize('bfgs', max_iters=4000, gtol=0, messages=1, xtol=0, ftol=0)
+    return m
+
+
+
+
+
+
+if __name__=="__main__":
+    import timeit
+
+

From 4cf1f5494f99aa4eee0a3228a745eb84a8b5e51a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 27 Mar 2015 15:12:52 +0000
Subject: [PATCH 132/166] extra kernel stressing in benchmarks, bugfix in svgp

---
 .../latent_function_inference/svgp.py         |   2 +-
 benchmarks/boston_housing.py                  |   2 +-
 benchmarks/housing.data                       | 506 ++++++++++++++++++
 3 files changed, 508 insertions(+), 2 deletions(-)
 create mode 100644 benchmarks/housing.data

diff --git a/GPy/inference/latent_function_inference/svgp.py b/GPy/inference/latent_function_inference/svgp.py
index d4797311..a3d1f78f 100644
--- a/GPy/inference/latent_function_inference/svgp.py
+++ b/GPy/inference/latent_function_inference/svgp.py
@@ -71,4 +71,4 @@ class SVGP(LatentFunctionInference):
         dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
         dL_dchol = choleskies.triang_to_flat(dL_dchol)
 
-        return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
+        return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
diff --git a/benchmarks/boston_housing.py b/benchmarks/boston_housing.py
index 988e3460..0dcff082 100644
--- a/benchmarks/boston_housing.py
+++ b/benchmarks/boston_housing.py
@@ -25,7 +25,7 @@ def fit_svgp_st():
     X, Y = load_housing_data()
 
     Z = X[np.random.permutation(X.shape[0])[:100]]
-    k = GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.Linear(X.shape[1]) + GPy.kern.White(1,0.01)
+    k = GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.Linear(X.shape[1], ARD=True) + GPy.kern.White(1,0.01) + GPy.kern.Bias(1)
 
     lik = GPy.likelihoods.StudentT(deg_free=3.)
     m = GPy.core.SVGP(X, Y, Z=Z, kernel=k, likelihood=lik)
diff --git a/benchmarks/housing.data b/benchmarks/housing.data
new file mode 100644
index 00000000..f83ac564
--- /dev/null
+++ b/benchmarks/housing.data
@@ -0,0 +1,506 @@
+ 0.00632  18.00   2.310  0  0.5380  6.5750  65.20  4.0900   1  296.0  15.30 396.90   4.98  24.00
+ 0.02731   0.00   7.070  0  0.4690  6.4210  78.90  4.9671   2  242.0  17.80 396.90   9.14  21.60
+ 0.02729   0.00   7.070  0  0.4690  7.1850  61.10  4.9671   2  242.0  17.80 392.83   4.03  34.70
+ 0.03237   0.00   2.180  0  0.4580  6.9980  45.80  6.0622   3  222.0  18.70 394.63   2.94  33.40
+ 0.06905   0.00   2.180  0  0.4580  7.1470  54.20  6.0622   3  222.0  18.70 396.90   5.33  36.20
+ 0.02985   0.00   2.180  0  0.4580  6.4300  58.70  6.0622   3  222.0  18.70 394.12   5.21  28.70
+ 0.08829  12.50   7.870  0  0.5240  6.0120  66.60  5.5605   5  311.0  15.20 395.60  12.43  22.90
+ 0.14455  12.50   7.870  0  0.5240  6.1720  96.10  5.9505   5  311.0  15.20 396.90  19.15  27.10
+ 0.21124  12.50   7.870  0  0.5240  5.6310 100.00  6.0821   5  311.0  15.20 386.63  29.93  16.50
+ 0.17004  12.50   7.870  0  0.5240  6.0040  85.90  6.5921   5  311.0  15.20 386.71  17.10  18.90
+ 0.22489  12.50   7.870  0  0.5240  6.3770  94.30  6.3467   5  311.0  15.20 392.52  20.45  15.00
+ 0.11747  12.50   7.870  0  0.5240  6.0090  82.90  6.2267   5  311.0  15.20 396.90  13.27  18.90
+ 0.09378  12.50   7.870  0  0.5240  5.8890  39.00  5.4509   5  311.0  15.20 390.50  15.71  21.70
+ 0.62976   0.00   8.140  0  0.5380  5.9490  61.80  4.7075   4  307.0  21.00 396.90   8.26  20.40
+ 0.63796   0.00   8.140  0  0.5380  6.0960  84.50  4.4619   4  307.0  21.00 380.02  10.26  18.20
+ 0.62739   0.00   8.140  0  0.5380  5.8340  56.50  4.4986   4  307.0  21.00 395.62   8.47  19.90
+ 1.05393   0.00   8.140  0  0.5380  5.9350  29.30  4.4986   4  307.0  21.00 386.85   6.58  23.10
+ 0.78420   0.00   8.140  0  0.5380  5.9900  81.70  4.2579   4  307.0  21.00 386.75  14.67  17.50
+ 0.80271   0.00   8.140  0  0.5380  5.4560  36.60  3.7965   4  307.0  21.00 288.99  11.69  20.20
+ 0.72580   0.00   8.140  0  0.5380  5.7270  69.50  3.7965   4  307.0  21.00 390.95  11.28  18.20
+ 1.25179   0.00   8.140  0  0.5380  5.5700  98.10  3.7979   4  307.0  21.00 376.57  21.02  13.60
+ 0.85204   0.00   8.140  0  0.5380  5.9650  89.20  4.0123   4  307.0  21.00 392.53  13.83  19.60
+ 1.23247   0.00   8.140  0  0.5380  6.1420  91.70  3.9769   4  307.0  21.00 396.90  18.72  15.20
+ 0.98843   0.00   8.140  0  0.5380  5.8130 100.00  4.0952   4  307.0  21.00 394.54  19.88  14.50
+ 0.75026   0.00   8.140  0  0.5380  5.9240  94.10  4.3996   4  307.0  21.00 394.33  16.30  15.60
+ 0.84054   0.00   8.140  0  0.5380  5.5990  85.70  4.4546   4  307.0  21.00 303.42  16.51  13.90
+ 0.67191   0.00   8.140  0  0.5380  5.8130  90.30  4.6820   4  307.0  21.00 376.88  14.81  16.60
+ 0.95577   0.00   8.140  0  0.5380  6.0470  88.80  4.4534   4  307.0  21.00 306.38  17.28  14.80
+ 0.77299   0.00   8.140  0  0.5380  6.4950  94.40  4.4547   4  307.0  21.00 387.94  12.80  18.40
+ 1.00245   0.00   8.140  0  0.5380  6.6740  87.30  4.2390   4  307.0  21.00 380.23  11.98  21.00
+ 1.13081   0.00   8.140  0  0.5380  5.7130  94.10  4.2330   4  307.0  21.00 360.17  22.60  12.70
+ 1.35472   0.00   8.140  0  0.5380  6.0720 100.00  4.1750   4  307.0  21.00 376.73  13.04  14.50
+ 1.38799   0.00   8.140  0  0.5380  5.9500  82.00  3.9900   4  307.0  21.00 232.60  27.71  13.20
+ 1.15172   0.00   8.140  0  0.5380  5.7010  95.00  3.7872   4  307.0  21.00 358.77  18.35  13.10
+ 1.61282   0.00   8.140  0  0.5380  6.0960  96.90  3.7598   4  307.0  21.00 248.31  20.34  13.50
+ 0.06417   0.00   5.960  0  0.4990  5.9330  68.20  3.3603   5  279.0  19.20 396.90   9.68  18.90
+ 0.09744   0.00   5.960  0  0.4990  5.8410  61.40  3.3779   5  279.0  19.20 377.56  11.41  20.00
+ 0.08014   0.00   5.960  0  0.4990  5.8500  41.50  3.9342   5  279.0  19.20 396.90   8.77  21.00
+ 0.17505   0.00   5.960  0  0.4990  5.9660  30.20  3.8473   5  279.0  19.20 393.43  10.13  24.70
+ 0.02763  75.00   2.950  0  0.4280  6.5950  21.80  5.4011   3  252.0  18.30 395.63   4.32  30.80
+ 0.03359  75.00   2.950  0  0.4280  7.0240  15.80  5.4011   3  252.0  18.30 395.62   1.98  34.90
+ 0.12744   0.00   6.910  0  0.4480  6.7700   2.90  5.7209   3  233.0  17.90 385.41   4.84  26.60
+ 0.14150   0.00   6.910  0  0.4480  6.1690   6.60  5.7209   3  233.0  17.90 383.37   5.81  25.30
+ 0.15936   0.00   6.910  0  0.4480  6.2110   6.50  5.7209   3  233.0  17.90 394.46   7.44  24.70
+ 0.12269   0.00   6.910  0  0.4480  6.0690  40.00  5.7209   3  233.0  17.90 389.39   9.55  21.20
+ 0.17142   0.00   6.910  0  0.4480  5.6820  33.80  5.1004   3  233.0  17.90 396.90  10.21  19.30
+ 0.18836   0.00   6.910  0  0.4480  5.7860  33.30  5.1004   3  233.0  17.90 396.90  14.15  20.00
+ 0.22927   0.00   6.910  0  0.4480  6.0300  85.50  5.6894   3  233.0  17.90 392.74  18.80  16.60
+ 0.25387   0.00   6.910  0  0.4480  5.3990  95.30  5.8700   3  233.0  17.90 396.90  30.81  14.40
+ 0.21977   0.00   6.910  0  0.4480  5.6020  62.00  6.0877   3  233.0  17.90 396.90  16.20  19.40
+ 0.08873  21.00   5.640  0  0.4390  5.9630  45.70  6.8147   4  243.0  16.80 395.56  13.45  19.70
+ 0.04337  21.00   5.640  0  0.4390  6.1150  63.00  6.8147   4  243.0  16.80 393.97   9.43  20.50
+ 0.05360  21.00   5.640  0  0.4390  6.5110  21.10  6.8147   4  243.0  16.80 396.90   5.28  25.00
+ 0.04981  21.00   5.640  0  0.4390  5.9980  21.40  6.8147   4  243.0  16.80 396.90   8.43  23.40
+ 0.01360  75.00   4.000  0  0.4100  5.8880  47.60  7.3197   3  469.0  21.10 396.90  14.80  18.90
+ 0.01311  90.00   1.220  0  0.4030  7.2490  21.90  8.6966   5  226.0  17.90 395.93   4.81  35.40
+ 0.02055  85.00   0.740  0  0.4100  6.3830  35.70  9.1876   2  313.0  17.30 396.90   5.77  24.70
+ 0.01432 100.00   1.320  0  0.4110  6.8160  40.50  8.3248   5  256.0  15.10 392.90   3.95  31.60
+ 0.15445  25.00   5.130  0  0.4530  6.1450  29.20  7.8148   8  284.0  19.70 390.68   6.86  23.30
+ 0.10328  25.00   5.130  0  0.4530  5.9270  47.20  6.9320   8  284.0  19.70 396.90   9.22  19.60
+ 0.14932  25.00   5.130  0  0.4530  5.7410  66.20  7.2254   8  284.0  19.70 395.11  13.15  18.70
+ 0.17171  25.00   5.130  0  0.4530  5.9660  93.40  6.8185   8  284.0  19.70 378.08  14.44  16.00
+ 0.11027  25.00   5.130  0  0.4530  6.4560  67.80  7.2255   8  284.0  19.70 396.90   6.73  22.20
+ 0.12650  25.00   5.130  0  0.4530  6.7620  43.40  7.9809   8  284.0  19.70 395.58   9.50  25.00
+ 0.01951  17.50   1.380  0  0.4161  7.1040  59.50  9.2229   3  216.0  18.60 393.24   8.05  33.00
+ 0.03584  80.00   3.370  0  0.3980  6.2900  17.80  6.6115   4  337.0  16.10 396.90   4.67  23.50
+ 0.04379  80.00   3.370  0  0.3980  5.7870  31.10  6.6115   4  337.0  16.10 396.90  10.24  19.40
+ 0.05789  12.50   6.070  0  0.4090  5.8780  21.40  6.4980   4  345.0  18.90 396.21   8.10  22.00
+ 0.13554  12.50   6.070  0  0.4090  5.5940  36.80  6.4980   4  345.0  18.90 396.90  13.09  17.40
+ 0.12816  12.50   6.070  0  0.4090  5.8850  33.00  6.4980   4  345.0  18.90 396.90   8.79  20.90
+ 0.08826   0.00  10.810  0  0.4130  6.4170   6.60  5.2873   4  305.0  19.20 383.73   6.72  24.20
+ 0.15876   0.00  10.810  0  0.4130  5.9610  17.50  5.2873   4  305.0  19.20 376.94   9.88  21.70
+ 0.09164   0.00  10.810  0  0.4130  6.0650   7.80  5.2873   4  305.0  19.20 390.91   5.52  22.80
+ 0.19539   0.00  10.810  0  0.4130  6.2450   6.20  5.2873   4  305.0  19.20 377.17   7.54  23.40
+ 0.07896   0.00  12.830  0  0.4370  6.2730   6.00  4.2515   5  398.0  18.70 394.92   6.78  24.10
+ 0.09512   0.00  12.830  0  0.4370  6.2860  45.00  4.5026   5  398.0  18.70 383.23   8.94  21.40
+ 0.10153   0.00  12.830  0  0.4370  6.2790  74.50  4.0522   5  398.0  18.70 373.66  11.97  20.00
+ 0.08707   0.00  12.830  0  0.4370  6.1400  45.80  4.0905   5  398.0  18.70 386.96  10.27  20.80
+ 0.05646   0.00  12.830  0  0.4370  6.2320  53.70  5.0141   5  398.0  18.70 386.40  12.34  21.20
+ 0.08387   0.00  12.830  0  0.4370  5.8740  36.60  4.5026   5  398.0  18.70 396.06   9.10  20.30
+ 0.04113  25.00   4.860  0  0.4260  6.7270  33.50  5.4007   4  281.0  19.00 396.90   5.29  28.00
+ 0.04462  25.00   4.860  0  0.4260  6.6190  70.40  5.4007   4  281.0  19.00 395.63   7.22  23.90
+ 0.03659  25.00   4.860  0  0.4260  6.3020  32.20  5.4007   4  281.0  19.00 396.90   6.72  24.80
+ 0.03551  25.00   4.860  0  0.4260  6.1670  46.70  5.4007   4  281.0  19.00 390.64   7.51  22.90
+ 0.05059   0.00   4.490  0  0.4490  6.3890  48.00  4.7794   3  247.0  18.50 396.90   9.62  23.90
+ 0.05735   0.00   4.490  0  0.4490  6.6300  56.10  4.4377   3  247.0  18.50 392.30   6.53  26.60
+ 0.05188   0.00   4.490  0  0.4490  6.0150  45.10  4.4272   3  247.0  18.50 395.99  12.86  22.50
+ 0.07151   0.00   4.490  0  0.4490  6.1210  56.80  3.7476   3  247.0  18.50 395.15   8.44  22.20
+ 0.05660   0.00   3.410  0  0.4890  7.0070  86.30  3.4217   2  270.0  17.80 396.90   5.50  23.60
+ 0.05302   0.00   3.410  0  0.4890  7.0790  63.10  3.4145   2  270.0  17.80 396.06   5.70  28.70
+ 0.04684   0.00   3.410  0  0.4890  6.4170  66.10  3.0923   2  270.0  17.80 392.18   8.81  22.60
+ 0.03932   0.00   3.410  0  0.4890  6.4050  73.90  3.0921   2  270.0  17.80 393.55   8.20  22.00
+ 0.04203  28.00  15.040  0  0.4640  6.4420  53.60  3.6659   4  270.0  18.20 395.01   8.16  22.90
+ 0.02875  28.00  15.040  0  0.4640  6.2110  28.90  3.6659   4  270.0  18.20 396.33   6.21  25.00
+ 0.04294  28.00  15.040  0  0.4640  6.2490  77.30  3.6150   4  270.0  18.20 396.90  10.59  20.60
+ 0.12204   0.00   2.890  0  0.4450  6.6250  57.80  3.4952   2  276.0  18.00 357.98   6.65  28.40
+ 0.11504   0.00   2.890  0  0.4450  6.1630  69.60  3.4952   2  276.0  18.00 391.83  11.34  21.40
+ 0.12083   0.00   2.890  0  0.4450  8.0690  76.00  3.4952   2  276.0  18.00 396.90   4.21  38.70
+ 0.08187   0.00   2.890  0  0.4450  7.8200  36.90  3.4952   2  276.0  18.00 393.53   3.57  43.80
+ 0.06860   0.00   2.890  0  0.4450  7.4160  62.50  3.4952   2  276.0  18.00 396.90   6.19  33.20
+ 0.14866   0.00   8.560  0  0.5200  6.7270  79.90  2.7778   5  384.0  20.90 394.76   9.42  27.50
+ 0.11432   0.00   8.560  0  0.5200  6.7810  71.30  2.8561   5  384.0  20.90 395.58   7.67  26.50
+ 0.22876   0.00   8.560  0  0.5200  6.4050  85.40  2.7147   5  384.0  20.90  70.80  10.63  18.60
+ 0.21161   0.00   8.560  0  0.5200  6.1370  87.40  2.7147   5  384.0  20.90 394.47  13.44  19.30
+ 0.13960   0.00   8.560  0  0.5200  6.1670  90.00  2.4210   5  384.0  20.90 392.69  12.33  20.10
+ 0.13262   0.00   8.560  0  0.5200  5.8510  96.70  2.1069   5  384.0  20.90 394.05  16.47  19.50
+ 0.17120   0.00   8.560  0  0.5200  5.8360  91.90  2.2110   5  384.0  20.90 395.67  18.66  19.50
+ 0.13117   0.00   8.560  0  0.5200  6.1270  85.20  2.1224   5  384.0  20.90 387.69  14.09  20.40
+ 0.12802   0.00   8.560  0  0.5200  6.4740  97.10  2.4329   5  384.0  20.90 395.24  12.27  19.80
+ 0.26363   0.00   8.560  0  0.5200  6.2290  91.20  2.5451   5  384.0  20.90 391.23  15.55  19.40
+ 0.10793   0.00   8.560  0  0.5200  6.1950  54.40  2.7778   5  384.0  20.90 393.49  13.00  21.70
+ 0.10084   0.00  10.010  0  0.5470  6.7150  81.60  2.6775   6  432.0  17.80 395.59  10.16  22.80
+ 0.12329   0.00  10.010  0  0.5470  5.9130  92.90  2.3534   6  432.0  17.80 394.95  16.21  18.80
+ 0.22212   0.00  10.010  0  0.5470  6.0920  95.40  2.5480   6  432.0  17.80 396.90  17.09  18.70
+ 0.14231   0.00  10.010  0  0.5470  6.2540  84.20  2.2565   6  432.0  17.80 388.74  10.45  18.50
+ 0.17134   0.00  10.010  0  0.5470  5.9280  88.20  2.4631   6  432.0  17.80 344.91  15.76  18.30
+ 0.13158   0.00  10.010  0  0.5470  6.1760  72.50  2.7301   6  432.0  17.80 393.30  12.04  21.20
+ 0.15098   0.00  10.010  0  0.5470  6.0210  82.60  2.7474   6  432.0  17.80 394.51  10.30  19.20
+ 0.13058   0.00  10.010  0  0.5470  5.8720  73.10  2.4775   6  432.0  17.80 338.63  15.37  20.40
+ 0.14476   0.00  10.010  0  0.5470  5.7310  65.20  2.7592   6  432.0  17.80 391.50  13.61  19.30
+ 0.06899   0.00  25.650  0  0.5810  5.8700  69.70  2.2577   2  188.0  19.10 389.15  14.37  22.00
+ 0.07165   0.00  25.650  0  0.5810  6.0040  84.10  2.1974   2  188.0  19.10 377.67  14.27  20.30
+ 0.09299   0.00  25.650  0  0.5810  5.9610  92.90  2.0869   2  188.0  19.10 378.09  17.93  20.50
+ 0.15038   0.00  25.650  0  0.5810  5.8560  97.00  1.9444   2  188.0  19.10 370.31  25.41  17.30
+ 0.09849   0.00  25.650  0  0.5810  5.8790  95.80  2.0063   2  188.0  19.10 379.38  17.58  18.80
+ 0.16902   0.00  25.650  0  0.5810  5.9860  88.40  1.9929   2  188.0  19.10 385.02  14.81  21.40
+ 0.38735   0.00  25.650  0  0.5810  5.6130  95.60  1.7572   2  188.0  19.10 359.29  27.26  15.70
+ 0.25915   0.00  21.890  0  0.6240  5.6930  96.00  1.7883   4  437.0  21.20 392.11  17.19  16.20
+ 0.32543   0.00  21.890  0  0.6240  6.4310  98.80  1.8125   4  437.0  21.20 396.90  15.39  18.00
+ 0.88125   0.00  21.890  0  0.6240  5.6370  94.70  1.9799   4  437.0  21.20 396.90  18.34  14.30
+ 0.34006   0.00  21.890  0  0.6240  6.4580  98.90  2.1185   4  437.0  21.20 395.04  12.60  19.20
+ 1.19294   0.00  21.890  0  0.6240  6.3260  97.70  2.2710   4  437.0  21.20 396.90  12.26  19.60
+ 0.59005   0.00  21.890  0  0.6240  6.3720  97.90  2.3274   4  437.0  21.20 385.76  11.12  23.00
+ 0.32982   0.00  21.890  0  0.6240  5.8220  95.40  2.4699   4  437.0  21.20 388.69  15.03  18.40
+ 0.97617   0.00  21.890  0  0.6240  5.7570  98.40  2.3460   4  437.0  21.20 262.76  17.31  15.60
+ 0.55778   0.00  21.890  0  0.6240  6.3350  98.20  2.1107   4  437.0  21.20 394.67  16.96  18.10
+ 0.32264   0.00  21.890  0  0.6240  5.9420  93.50  1.9669   4  437.0  21.20 378.25  16.90  17.40
+ 0.35233   0.00  21.890  0  0.6240  6.4540  98.40  1.8498   4  437.0  21.20 394.08  14.59  17.10
+ 0.24980   0.00  21.890  0  0.6240  5.8570  98.20  1.6686   4  437.0  21.20 392.04  21.32  13.30
+ 0.54452   0.00  21.890  0  0.6240  6.1510  97.90  1.6687   4  437.0  21.20 396.90  18.46  17.80
+ 0.29090   0.00  21.890  0  0.6240  6.1740  93.60  1.6119   4  437.0  21.20 388.08  24.16  14.00
+ 1.62864   0.00  21.890  0  0.6240  5.0190 100.00  1.4394   4  437.0  21.20 396.90  34.41  14.40
+ 3.32105   0.00  19.580  1  0.8710  5.4030 100.00  1.3216   5  403.0  14.70 396.90  26.82  13.40
+ 4.09740   0.00  19.580  0  0.8710  5.4680 100.00  1.4118   5  403.0  14.70 396.90  26.42  15.60
+ 2.77974   0.00  19.580  0  0.8710  4.9030  97.80  1.3459   5  403.0  14.70 396.90  29.29  11.80
+ 2.37934   0.00  19.580  0  0.8710  6.1300 100.00  1.4191   5  403.0  14.70 172.91  27.80  13.80
+ 2.15505   0.00  19.580  0  0.8710  5.6280 100.00  1.5166   5  403.0  14.70 169.27  16.65  15.60
+ 2.36862   0.00  19.580  0  0.8710  4.9260  95.70  1.4608   5  403.0  14.70 391.71  29.53  14.60
+ 2.33099   0.00  19.580  0  0.8710  5.1860  93.80  1.5296   5  403.0  14.70 356.99  28.32  17.80
+ 2.73397   0.00  19.580  0  0.8710  5.5970  94.90  1.5257   5  403.0  14.70 351.85  21.45  15.40
+ 1.65660   0.00  19.580  0  0.8710  6.1220  97.30  1.6180   5  403.0  14.70 372.80  14.10  21.50
+ 1.49632   0.00  19.580  0  0.8710  5.4040 100.00  1.5916   5  403.0  14.70 341.60  13.28  19.60
+ 1.12658   0.00  19.580  1  0.8710  5.0120  88.00  1.6102   5  403.0  14.70 343.28  12.12  15.30
+ 2.14918   0.00  19.580  0  0.8710  5.7090  98.50  1.6232   5  403.0  14.70 261.95  15.79  19.40
+ 1.41385   0.00  19.580  1  0.8710  6.1290  96.00  1.7494   5  403.0  14.70 321.02  15.12  17.00
+ 3.53501   0.00  19.580  1  0.8710  6.1520  82.60  1.7455   5  403.0  14.70  88.01  15.02  15.60
+ 2.44668   0.00  19.580  0  0.8710  5.2720  94.00  1.7364   5  403.0  14.70  88.63  16.14  13.10
+ 1.22358   0.00  19.580  0  0.6050  6.9430  97.40  1.8773   5  403.0  14.70 363.43   4.59  41.30
+ 1.34284   0.00  19.580  0  0.6050  6.0660 100.00  1.7573   5  403.0  14.70 353.89   6.43  24.30
+ 1.42502   0.00  19.580  0  0.8710  6.5100 100.00  1.7659   5  403.0  14.70 364.31   7.39  23.30
+ 1.27346   0.00  19.580  1  0.6050  6.2500  92.60  1.7984   5  403.0  14.70 338.92   5.50  27.00
+ 1.46336   0.00  19.580  0  0.6050  7.4890  90.80  1.9709   5  403.0  14.70 374.43   1.73  50.00
+ 1.83377   0.00  19.580  1  0.6050  7.8020  98.20  2.0407   5  403.0  14.70 389.61   1.92  50.00
+ 1.51902   0.00  19.580  1  0.6050  8.3750  93.90  2.1620   5  403.0  14.70 388.45   3.32  50.00
+ 2.24236   0.00  19.580  0  0.6050  5.8540  91.80  2.4220   5  403.0  14.70 395.11  11.64  22.70
+ 2.92400   0.00  19.580  0  0.6050  6.1010  93.00  2.2834   5  403.0  14.70 240.16   9.81  25.00
+ 2.01019   0.00  19.580  0  0.6050  7.9290  96.20  2.0459   5  403.0  14.70 369.30   3.70  50.00
+ 1.80028   0.00  19.580  0  0.6050  5.8770  79.20  2.4259   5  403.0  14.70 227.61  12.14  23.80
+ 2.30040   0.00  19.580  0  0.6050  6.3190  96.10  2.1000   5  403.0  14.70 297.09  11.10  23.80
+ 2.44953   0.00  19.580  0  0.6050  6.4020  95.20  2.2625   5  403.0  14.70 330.04  11.32  22.30
+ 1.20742   0.00  19.580  0  0.6050  5.8750  94.60  2.4259   5  403.0  14.70 292.29  14.43  17.40
+ 2.31390   0.00  19.580  0  0.6050  5.8800  97.30  2.3887   5  403.0  14.70 348.13  12.03  19.10
+ 0.13914   0.00   4.050  0  0.5100  5.5720  88.50  2.5961   5  296.0  16.60 396.90  14.69  23.10
+ 0.09178   0.00   4.050  0  0.5100  6.4160  84.10  2.6463   5  296.0  16.60 395.50   9.04  23.60
+ 0.08447   0.00   4.050  0  0.5100  5.8590  68.70  2.7019   5  296.0  16.60 393.23   9.64  22.60
+ 0.06664   0.00   4.050  0  0.5100  6.5460  33.10  3.1323   5  296.0  16.60 390.96   5.33  29.40
+ 0.07022   0.00   4.050  0  0.5100  6.0200  47.20  3.5549   5  296.0  16.60 393.23  10.11  23.20
+ 0.05425   0.00   4.050  0  0.5100  6.3150  73.40  3.3175   5  296.0  16.60 395.60   6.29  24.60
+ 0.06642   0.00   4.050  0  0.5100  6.8600  74.40  2.9153   5  296.0  16.60 391.27   6.92  29.90
+ 0.05780   0.00   2.460  0  0.4880  6.9800  58.40  2.8290   3  193.0  17.80 396.90   5.04  37.20
+ 0.06588   0.00   2.460  0  0.4880  7.7650  83.30  2.7410   3  193.0  17.80 395.56   7.56  39.80
+ 0.06888   0.00   2.460  0  0.4880  6.1440  62.20  2.5979   3  193.0  17.80 396.90   9.45  36.20
+ 0.09103   0.00   2.460  0  0.4880  7.1550  92.20  2.7006   3  193.0  17.80 394.12   4.82  37.90
+ 0.10008   0.00   2.460  0  0.4880  6.5630  95.60  2.8470   3  193.0  17.80 396.90   5.68  32.50
+ 0.08308   0.00   2.460  0  0.4880  5.6040  89.80  2.9879   3  193.0  17.80 391.00  13.98  26.40
+ 0.06047   0.00   2.460  0  0.4880  6.1530  68.80  3.2797   3  193.0  17.80 387.11  13.15  29.60
+ 0.05602   0.00   2.460  0  0.4880  7.8310  53.60  3.1992   3  193.0  17.80 392.63   4.45  50.00
+ 0.07875  45.00   3.440  0  0.4370  6.7820  41.10  3.7886   5  398.0  15.20 393.87   6.68  32.00
+ 0.12579  45.00   3.440  0  0.4370  6.5560  29.10  4.5667   5  398.0  15.20 382.84   4.56  29.80
+ 0.08370  45.00   3.440  0  0.4370  7.1850  38.90  4.5667   5  398.0  15.20 396.90   5.39  34.90
+ 0.09068  45.00   3.440  0  0.4370  6.9510  21.50  6.4798   5  398.0  15.20 377.68   5.10  37.00
+ 0.06911  45.00   3.440  0  0.4370  6.7390  30.80  6.4798   5  398.0  15.20 389.71   4.69  30.50
+ 0.08664  45.00   3.440  0  0.4370  7.1780  26.30  6.4798   5  398.0  15.20 390.49   2.87  36.40
+ 0.02187  60.00   2.930  0  0.4010  6.8000   9.90  6.2196   1  265.0  15.60 393.37   5.03  31.10
+ 0.01439  60.00   2.930  0  0.4010  6.6040  18.80  6.2196   1  265.0  15.60 376.70   4.38  29.10
+ 0.01381  80.00   0.460  0  0.4220  7.8750  32.00  5.6484   4  255.0  14.40 394.23   2.97  50.00
+ 0.04011  80.00   1.520  0  0.4040  7.2870  34.10  7.3090   2  329.0  12.60 396.90   4.08  33.30
+ 0.04666  80.00   1.520  0  0.4040  7.1070  36.60  7.3090   2  329.0  12.60 354.31   8.61  30.30
+ 0.03768  80.00   1.520  0  0.4040  7.2740  38.30  7.3090   2  329.0  12.60 392.20   6.62  34.60
+ 0.03150  95.00   1.470  0  0.4030  6.9750  15.30  7.6534   3  402.0  17.00 396.90   4.56  34.90
+ 0.01778  95.00   1.470  0  0.4030  7.1350  13.90  7.6534   3  402.0  17.00 384.30   4.45  32.90
+ 0.03445  82.50   2.030  0  0.4150  6.1620  38.40  6.2700   2  348.0  14.70 393.77   7.43  24.10
+ 0.02177  82.50   2.030  0  0.4150  7.6100  15.70  6.2700   2  348.0  14.70 395.38   3.11  42.30
+ 0.03510  95.00   2.680  0  0.4161  7.8530  33.20  5.1180   4  224.0  14.70 392.78   3.81  48.50
+ 0.02009  95.00   2.680  0  0.4161  8.0340  31.90  5.1180   4  224.0  14.70 390.55   2.88  50.00
+ 0.13642   0.00  10.590  0  0.4890  5.8910  22.30  3.9454   4  277.0  18.60 396.90  10.87  22.60
+ 0.22969   0.00  10.590  0  0.4890  6.3260  52.50  4.3549   4  277.0  18.60 394.87  10.97  24.40
+ 0.25199   0.00  10.590  0  0.4890  5.7830  72.70  4.3549   4  277.0  18.60 389.43  18.06  22.50
+ 0.13587   0.00  10.590  1  0.4890  6.0640  59.10  4.2392   4  277.0  18.60 381.32  14.66  24.40
+ 0.43571   0.00  10.590  1  0.4890  5.3440 100.00  3.8750   4  277.0  18.60 396.90  23.09  20.00
+ 0.17446   0.00  10.590  1  0.4890  5.9600  92.10  3.8771   4  277.0  18.60 393.25  17.27  21.70
+ 0.37578   0.00  10.590  1  0.4890  5.4040  88.60  3.6650   4  277.0  18.60 395.24  23.98  19.30
+ 0.21719   0.00  10.590  1  0.4890  5.8070  53.80  3.6526   4  277.0  18.60 390.94  16.03  22.40
+ 0.14052   0.00  10.590  0  0.4890  6.3750  32.30  3.9454   4  277.0  18.60 385.81   9.38  28.10
+ 0.28955   0.00  10.590  0  0.4890  5.4120   9.80  3.5875   4  277.0  18.60 348.93  29.55  23.70
+ 0.19802   0.00  10.590  0  0.4890  6.1820  42.40  3.9454   4  277.0  18.60 393.63   9.47  25.00
+ 0.04560   0.00  13.890  1  0.5500  5.8880  56.00  3.1121   5  276.0  16.40 392.80  13.51  23.30
+ 0.07013   0.00  13.890  0  0.5500  6.6420  85.10  3.4211   5  276.0  16.40 392.78   9.69  28.70
+ 0.11069   0.00  13.890  1  0.5500  5.9510  93.80  2.8893   5  276.0  16.40 396.90  17.92  21.50
+ 0.11425   0.00  13.890  1  0.5500  6.3730  92.40  3.3633   5  276.0  16.40 393.74  10.50  23.00
+ 0.35809   0.00   6.200  1  0.5070  6.9510  88.50  2.8617   8  307.0  17.40 391.70   9.71  26.70
+ 0.40771   0.00   6.200  1  0.5070  6.1640  91.30  3.0480   8  307.0  17.40 395.24  21.46  21.70
+ 0.62356   0.00   6.200  1  0.5070  6.8790  77.70  3.2721   8  307.0  17.40 390.39   9.93  27.50
+ 0.61470   0.00   6.200  0  0.5070  6.6180  80.80  3.2721   8  307.0  17.40 396.90   7.60  30.10
+ 0.31533   0.00   6.200  0  0.5040  8.2660  78.30  2.8944   8  307.0  17.40 385.05   4.14  44.80
+ 0.52693   0.00   6.200  0  0.5040  8.7250  83.00  2.8944   8  307.0  17.40 382.00   4.63  50.00
+ 0.38214   0.00   6.200  0  0.5040  8.0400  86.50  3.2157   8  307.0  17.40 387.38   3.13  37.60
+ 0.41238   0.00   6.200  0  0.5040  7.1630  79.90  3.2157   8  307.0  17.40 372.08   6.36  31.60
+ 0.29819   0.00   6.200  0  0.5040  7.6860  17.00  3.3751   8  307.0  17.40 377.51   3.92  46.70
+ 0.44178   0.00   6.200  0  0.5040  6.5520  21.40  3.3751   8  307.0  17.40 380.34   3.76  31.50
+ 0.53700   0.00   6.200  0  0.5040  5.9810  68.10  3.6715   8  307.0  17.40 378.35  11.65  24.30
+ 0.46296   0.00   6.200  0  0.5040  7.4120  76.90  3.6715   8  307.0  17.40 376.14   5.25  31.70
+ 0.57529   0.00   6.200  0  0.5070  8.3370  73.30  3.8384   8  307.0  17.40 385.91   2.47  41.70
+ 0.33147   0.00   6.200  0  0.5070  8.2470  70.40  3.6519   8  307.0  17.40 378.95   3.95  48.30
+ 0.44791   0.00   6.200  1  0.5070  6.7260  66.50  3.6519   8  307.0  17.40 360.20   8.05  29.00
+ 0.33045   0.00   6.200  0  0.5070  6.0860  61.50  3.6519   8  307.0  17.40 376.75  10.88  24.00
+ 0.52058   0.00   6.200  1  0.5070  6.6310  76.50  4.1480   8  307.0  17.40 388.45   9.54  25.10
+ 0.51183   0.00   6.200  0  0.5070  7.3580  71.60  4.1480   8  307.0  17.40 390.07   4.73  31.50
+ 0.08244  30.00   4.930  0  0.4280  6.4810  18.50  6.1899   6  300.0  16.60 379.41   6.36  23.70
+ 0.09252  30.00   4.930  0  0.4280  6.6060  42.20  6.1899   6  300.0  16.60 383.78   7.37  23.30
+ 0.11329  30.00   4.930  0  0.4280  6.8970  54.30  6.3361   6  300.0  16.60 391.25  11.38  22.00
+ 0.10612  30.00   4.930  0  0.4280  6.0950  65.10  6.3361   6  300.0  16.60 394.62  12.40  20.10
+ 0.10290  30.00   4.930  0  0.4280  6.3580  52.90  7.0355   6  300.0  16.60 372.75  11.22  22.20
+ 0.12757  30.00   4.930  0  0.4280  6.3930   7.80  7.0355   6  300.0  16.60 374.71   5.19  23.70
+ 0.20608  22.00   5.860  0  0.4310  5.5930  76.50  7.9549   7  330.0  19.10 372.49  12.50  17.60
+ 0.19133  22.00   5.860  0  0.4310  5.6050  70.20  7.9549   7  330.0  19.10 389.13  18.46  18.50
+ 0.33983  22.00   5.860  0  0.4310  6.1080  34.90  8.0555   7  330.0  19.10 390.18   9.16  24.30
+ 0.19657  22.00   5.860  0  0.4310  6.2260  79.20  8.0555   7  330.0  19.10 376.14  10.15  20.50
+ 0.16439  22.00   5.860  0  0.4310  6.4330  49.10  7.8265   7  330.0  19.10 374.71   9.52  24.50
+ 0.19073  22.00   5.860  0  0.4310  6.7180  17.50  7.8265   7  330.0  19.10 393.74   6.56  26.20
+ 0.14030  22.00   5.860  0  0.4310  6.4870  13.00  7.3967   7  330.0  19.10 396.28   5.90  24.40
+ 0.21409  22.00   5.860  0  0.4310  6.4380   8.90  7.3967   7  330.0  19.10 377.07   3.59  24.80
+ 0.08221  22.00   5.860  0  0.4310  6.9570   6.80  8.9067   7  330.0  19.10 386.09   3.53  29.60
+ 0.36894  22.00   5.860  0  0.4310  8.2590   8.40  8.9067   7  330.0  19.10 396.90   3.54  42.80
+ 0.04819  80.00   3.640  0  0.3920  6.1080  32.00  9.2203   1  315.0  16.40 392.89   6.57  21.90
+ 0.03548  80.00   3.640  0  0.3920  5.8760  19.10  9.2203   1  315.0  16.40 395.18   9.25  20.90
+ 0.01538  90.00   3.750  0  0.3940  7.4540  34.20  6.3361   3  244.0  15.90 386.34   3.11  44.00
+ 0.61154  20.00   3.970  0  0.6470  8.7040  86.90  1.8010   5  264.0  13.00 389.70   5.12  50.00
+ 0.66351  20.00   3.970  0  0.6470  7.3330 100.00  1.8946   5  264.0  13.00 383.29   7.79  36.00
+ 0.65665  20.00   3.970  0  0.6470  6.8420 100.00  2.0107   5  264.0  13.00 391.93   6.90  30.10
+ 0.54011  20.00   3.970  0  0.6470  7.2030  81.80  2.1121   5  264.0  13.00 392.80   9.59  33.80
+ 0.53412  20.00   3.970  0  0.6470  7.5200  89.40  2.1398   5  264.0  13.00 388.37   7.26  43.10
+ 0.52014  20.00   3.970  0  0.6470  8.3980  91.50  2.2885   5  264.0  13.00 386.86   5.91  48.80
+ 0.82526  20.00   3.970  0  0.6470  7.3270  94.50  2.0788   5  264.0  13.00 393.42  11.25  31.00
+ 0.55007  20.00   3.970  0  0.6470  7.2060  91.60  1.9301   5  264.0  13.00 387.89   8.10  36.50
+ 0.76162  20.00   3.970  0  0.6470  5.5600  62.80  1.9865   5  264.0  13.00 392.40  10.45  22.80
+ 0.78570  20.00   3.970  0  0.6470  7.0140  84.60  2.1329   5  264.0  13.00 384.07  14.79  30.70
+ 0.57834  20.00   3.970  0  0.5750  8.2970  67.00  2.4216   5  264.0  13.00 384.54   7.44  50.00
+ 0.54050  20.00   3.970  0  0.5750  7.4700  52.60  2.8720   5  264.0  13.00 390.30   3.16  43.50
+ 0.09065  20.00   6.960  1  0.4640  5.9200  61.50  3.9175   3  223.0  18.60 391.34  13.65  20.70
+ 0.29916  20.00   6.960  0  0.4640  5.8560  42.10  4.4290   3  223.0  18.60 388.65  13.00  21.10
+ 0.16211  20.00   6.960  0  0.4640  6.2400  16.30  4.4290   3  223.0  18.60 396.90   6.59  25.20
+ 0.11460  20.00   6.960  0  0.4640  6.5380  58.70  3.9175   3  223.0  18.60 394.96   7.73  24.40
+ 0.22188  20.00   6.960  1  0.4640  7.6910  51.80  4.3665   3  223.0  18.60 390.77   6.58  35.20
+ 0.05644  40.00   6.410  1  0.4470  6.7580  32.90  4.0776   4  254.0  17.60 396.90   3.53  32.40
+ 0.09604  40.00   6.410  0  0.4470  6.8540  42.80  4.2673   4  254.0  17.60 396.90   2.98  32.00
+ 0.10469  40.00   6.410  1  0.4470  7.2670  49.00  4.7872   4  254.0  17.60 389.25   6.05  33.20
+ 0.06127  40.00   6.410  1  0.4470  6.8260  27.60  4.8628   4  254.0  17.60 393.45   4.16  33.10
+ 0.07978  40.00   6.410  0  0.4470  6.4820  32.10  4.1403   4  254.0  17.60 396.90   7.19  29.10
+ 0.21038  20.00   3.330  0  0.4429  6.8120  32.20  4.1007   5  216.0  14.90 396.90   4.85  35.10
+ 0.03578  20.00   3.330  0  0.4429  7.8200  64.50  4.6947   5  216.0  14.90 387.31   3.76  45.40
+ 0.03705  20.00   3.330  0  0.4429  6.9680  37.20  5.2447   5  216.0  14.90 392.23   4.59  35.40
+ 0.06129  20.00   3.330  1  0.4429  7.6450  49.70  5.2119   5  216.0  14.90 377.07   3.01  46.00
+ 0.01501  90.00   1.210  1  0.4010  7.9230  24.80  5.8850   1  198.0  13.60 395.52   3.16  50.00
+ 0.00906  90.00   2.970  0  0.4000  7.0880  20.80  7.3073   1  285.0  15.30 394.72   7.85  32.20
+ 0.01096  55.00   2.250  0  0.3890  6.4530  31.90  7.3073   1  300.0  15.30 394.72   8.23  22.00
+ 0.01965  80.00   1.760  0  0.3850  6.2300  31.50  9.0892   1  241.0  18.20 341.60  12.93  20.10
+ 0.03871  52.50   5.320  0  0.4050  6.2090  31.30  7.3172   6  293.0  16.60 396.90   7.14  23.20
+ 0.04590  52.50   5.320  0  0.4050  6.3150  45.60  7.3172   6  293.0  16.60 396.90   7.60  22.30
+ 0.04297  52.50   5.320  0  0.4050  6.5650  22.90  7.3172   6  293.0  16.60 371.72   9.51  24.80
+ 0.03502  80.00   4.950  0  0.4110  6.8610  27.90  5.1167   4  245.0  19.20 396.90   3.33  28.50
+ 0.07886  80.00   4.950  0  0.4110  7.1480  27.70  5.1167   4  245.0  19.20 396.90   3.56  37.30
+ 0.03615  80.00   4.950  0  0.4110  6.6300  23.40  5.1167   4  245.0  19.20 396.90   4.70  27.90
+ 0.08265   0.00  13.920  0  0.4370  6.1270  18.40  5.5027   4  289.0  16.00 396.90   8.58  23.90
+ 0.08199   0.00  13.920  0  0.4370  6.0090  42.30  5.5027   4  289.0  16.00 396.90  10.40  21.70
+ 0.12932   0.00  13.920  0  0.4370  6.6780  31.10  5.9604   4  289.0  16.00 396.90   6.27  28.60
+ 0.05372   0.00  13.920  0  0.4370  6.5490  51.00  5.9604   4  289.0  16.00 392.85   7.39  27.10
+ 0.14103   0.00  13.920  0  0.4370  5.7900  58.00  6.3200   4  289.0  16.00 396.90  15.84  20.30
+ 0.06466  70.00   2.240  0  0.4000  6.3450  20.10  7.8278   5  358.0  14.80 368.24   4.97  22.50
+ 0.05561  70.00   2.240  0  0.4000  7.0410  10.00  7.8278   5  358.0  14.80 371.58   4.74  29.00
+ 0.04417  70.00   2.240  0  0.4000  6.8710  47.40  7.8278   5  358.0  14.80 390.86   6.07  24.80
+ 0.03537  34.00   6.090  0  0.4330  6.5900  40.40  5.4917   7  329.0  16.10 395.75   9.50  22.00
+ 0.09266  34.00   6.090  0  0.4330  6.4950  18.40  5.4917   7  329.0  16.10 383.61   8.67  26.40
+ 0.10000  34.00   6.090  0  0.4330  6.9820  17.70  5.4917   7  329.0  16.10 390.43   4.86  33.10
+ 0.05515  33.00   2.180  0  0.4720  7.2360  41.10  4.0220   7  222.0  18.40 393.68   6.93  36.10
+ 0.05479  33.00   2.180  0  0.4720  6.6160  58.10  3.3700   7  222.0  18.40 393.36   8.93  28.40
+ 0.07503  33.00   2.180  0  0.4720  7.4200  71.90  3.0992   7  222.0  18.40 396.90   6.47  33.40
+ 0.04932  33.00   2.180  0  0.4720  6.8490  70.30  3.1827   7  222.0  18.40 396.90   7.53  28.20
+ 0.49298   0.00   9.900  0  0.5440  6.6350  82.50  3.3175   4  304.0  18.40 396.90   4.54  22.80
+ 0.34940   0.00   9.900  0  0.5440  5.9720  76.70  3.1025   4  304.0  18.40 396.24   9.97  20.30
+ 2.63548   0.00   9.900  0  0.5440  4.9730  37.80  2.5194   4  304.0  18.40 350.45  12.64  16.10
+ 0.79041   0.00   9.900  0  0.5440  6.1220  52.80  2.6403   4  304.0  18.40 396.90   5.98  22.10
+ 0.26169   0.00   9.900  0  0.5440  6.0230  90.40  2.8340   4  304.0  18.40 396.30  11.72  19.40
+ 0.26938   0.00   9.900  0  0.5440  6.2660  82.80  3.2628   4  304.0  18.40 393.39   7.90  21.60
+ 0.36920   0.00   9.900  0  0.5440  6.5670  87.30  3.6023   4  304.0  18.40 395.69   9.28  23.80
+ 0.25356   0.00   9.900  0  0.5440  5.7050  77.70  3.9450   4  304.0  18.40 396.42  11.50  16.20
+ 0.31827   0.00   9.900  0  0.5440  5.9140  83.20  3.9986   4  304.0  18.40 390.70  18.33  17.80
+ 0.24522   0.00   9.900  0  0.5440  5.7820  71.70  4.0317   4  304.0  18.40 396.90  15.94  19.80
+ 0.40202   0.00   9.900  0  0.5440  6.3820  67.20  3.5325   4  304.0  18.40 395.21  10.36  23.10
+ 0.47547   0.00   9.900  0  0.5440  6.1130  58.80  4.0019   4  304.0  18.40 396.23  12.73  21.00
+ 0.16760   0.00   7.380  0  0.4930  6.4260  52.30  4.5404   5  287.0  19.60 396.90   7.20  23.80
+ 0.18159   0.00   7.380  0  0.4930  6.3760  54.30  4.5404   5  287.0  19.60 396.90   6.87  23.10
+ 0.35114   0.00   7.380  0  0.4930  6.0410  49.90  4.7211   5  287.0  19.60 396.90   7.70  20.40
+ 0.28392   0.00   7.380  0  0.4930  5.7080  74.30  4.7211   5  287.0  19.60 391.13  11.74  18.50
+ 0.34109   0.00   7.380  0  0.4930  6.4150  40.10  4.7211   5  287.0  19.60 396.90   6.12  25.00
+ 0.19186   0.00   7.380  0  0.4930  6.4310  14.70  5.4159   5  287.0  19.60 393.68   5.08  24.60
+ 0.30347   0.00   7.380  0  0.4930  6.3120  28.90  5.4159   5  287.0  19.60 396.90   6.15  23.00
+ 0.24103   0.00   7.380  0  0.4930  6.0830  43.70  5.4159   5  287.0  19.60 396.90  12.79  22.20
+ 0.06617   0.00   3.240  0  0.4600  5.8680  25.80  5.2146   4  430.0  16.90 382.44   9.97  19.30
+ 0.06724   0.00   3.240  0  0.4600  6.3330  17.20  5.2146   4  430.0  16.90 375.21   7.34  22.60
+ 0.04544   0.00   3.240  0  0.4600  6.1440  32.20  5.8736   4  430.0  16.90 368.57   9.09  19.80
+ 0.05023  35.00   6.060  0  0.4379  5.7060  28.40  6.6407   1  304.0  16.90 394.02  12.43  17.10
+ 0.03466  35.00   6.060  0  0.4379  6.0310  23.30  6.6407   1  304.0  16.90 362.25   7.83  19.40
+ 0.05083   0.00   5.190  0  0.5150  6.3160  38.10  6.4584   5  224.0  20.20 389.71   5.68  22.20
+ 0.03738   0.00   5.190  0  0.5150  6.3100  38.50  6.4584   5  224.0  20.20 389.40   6.75  20.70
+ 0.03961   0.00   5.190  0  0.5150  6.0370  34.50  5.9853   5  224.0  20.20 396.90   8.01  21.10
+ 0.03427   0.00   5.190  0  0.5150  5.8690  46.30  5.2311   5  224.0  20.20 396.90   9.80  19.50
+ 0.03041   0.00   5.190  0  0.5150  5.8950  59.60  5.6150   5  224.0  20.20 394.81  10.56  18.50
+ 0.03306   0.00   5.190  0  0.5150  6.0590  37.30  4.8122   5  224.0  20.20 396.14   8.51  20.60
+ 0.05497   0.00   5.190  0  0.5150  5.9850  45.40  4.8122   5  224.0  20.20 396.90   9.74  19.00
+ 0.06151   0.00   5.190  0  0.5150  5.9680  58.50  4.8122   5  224.0  20.20 396.90   9.29  18.70
+ 0.01301  35.00   1.520  0  0.4420  7.2410  49.30  7.0379   1  284.0  15.50 394.74   5.49  32.70
+ 0.02498   0.00   1.890  0  0.5180  6.5400  59.70  6.2669   1  422.0  15.90 389.96   8.65  16.50
+ 0.02543  55.00   3.780  0  0.4840  6.6960  56.40  5.7321   5  370.0  17.60 396.90   7.18  23.90
+ 0.03049  55.00   3.780  0  0.4840  6.8740  28.10  6.4654   5  370.0  17.60 387.97   4.61  31.20
+ 0.03113   0.00   4.390  0  0.4420  6.0140  48.50  8.0136   3  352.0  18.80 385.64  10.53  17.50
+ 0.06162   0.00   4.390  0  0.4420  5.8980  52.30  8.0136   3  352.0  18.80 364.61  12.67  17.20
+ 0.01870  85.00   4.150  0  0.4290  6.5160  27.70  8.5353   4  351.0  17.90 392.43   6.36  23.10
+ 0.01501  80.00   2.010  0  0.4350  6.6350  29.70  8.3440   4  280.0  17.00 390.94   5.99  24.50
+ 0.02899  40.00   1.250  0  0.4290  6.9390  34.50  8.7921   1  335.0  19.70 389.85   5.89  26.60
+ 0.06211  40.00   1.250  0  0.4290  6.4900  44.40  8.7921   1  335.0  19.70 396.90   5.98  22.90
+ 0.07950  60.00   1.690  0  0.4110  6.5790  35.90 10.7103   4  411.0  18.30 370.78   5.49  24.10
+ 0.07244  60.00   1.690  0  0.4110  5.8840  18.50 10.7103   4  411.0  18.30 392.33   7.79  18.60
+ 0.01709  90.00   2.020  0  0.4100  6.7280  36.10 12.1265   5  187.0  17.00 384.46   4.50  30.10
+ 0.04301  80.00   1.910  0  0.4130  5.6630  21.90 10.5857   4  334.0  22.00 382.80   8.05  18.20
+ 0.10659  80.00   1.910  0  0.4130  5.9360  19.50 10.5857   4  334.0  22.00 376.04   5.57  20.60
+ 8.98296   0.00  18.100  1  0.7700  6.2120  97.40  2.1222  24  666.0  20.20 377.73  17.60  17.80
+ 3.84970   0.00  18.100  1  0.7700  6.3950  91.00  2.5052  24  666.0  20.20 391.34  13.27  21.70
+ 5.20177   0.00  18.100  1  0.7700  6.1270  83.40  2.7227  24  666.0  20.20 395.43  11.48  22.70
+ 4.26131   0.00  18.100  0  0.7700  6.1120  81.30  2.5091  24  666.0  20.20 390.74  12.67  22.60
+ 4.54192   0.00  18.100  0  0.7700  6.3980  88.00  2.5182  24  666.0  20.20 374.56   7.79  25.00
+ 3.83684   0.00  18.100  0  0.7700  6.2510  91.10  2.2955  24  666.0  20.20 350.65  14.19  19.90
+ 3.67822   0.00  18.100  0  0.7700  5.3620  96.20  2.1036  24  666.0  20.20 380.79  10.19  20.80
+ 4.22239   0.00  18.100  1  0.7700  5.8030  89.00  1.9047  24  666.0  20.20 353.04  14.64  16.80
+ 3.47428   0.00  18.100  1  0.7180  8.7800  82.90  1.9047  24  666.0  20.20 354.55   5.29  21.90
+ 4.55587   0.00  18.100  0  0.7180  3.5610  87.90  1.6132  24  666.0  20.20 354.70   7.12  27.50
+ 3.69695   0.00  18.100  0  0.7180  4.9630  91.40  1.7523  24  666.0  20.20 316.03  14.00  21.90
+13.52220   0.00  18.100  0  0.6310  3.8630 100.00  1.5106  24  666.0  20.20 131.42  13.33  23.10
+ 4.89822   0.00  18.100  0  0.6310  4.9700 100.00  1.3325  24  666.0  20.20 375.52   3.26  50.00
+ 5.66998   0.00  18.100  1  0.6310  6.6830  96.80  1.3567  24  666.0  20.20 375.33   3.73  50.00
+ 6.53876   0.00  18.100  1  0.6310  7.0160  97.50  1.2024  24  666.0  20.20 392.05   2.96  50.00
+ 9.23230   0.00  18.100  0  0.6310  6.2160 100.00  1.1691  24  666.0  20.20 366.15   9.53  50.00
+ 8.26725   0.00  18.100  1  0.6680  5.8750  89.60  1.1296  24  666.0  20.20 347.88   8.88  50.00
+11.10810   0.00  18.100  0  0.6680  4.9060 100.00  1.1742  24  666.0  20.20 396.90  34.77  13.80
+18.49820   0.00  18.100  0  0.6680  4.1380 100.00  1.1370  24  666.0  20.20 396.90  37.97  13.80
+19.60910   0.00  18.100  0  0.6710  7.3130  97.90  1.3163  24  666.0  20.20 396.90  13.44  15.00
+15.28800   0.00  18.100  0  0.6710  6.6490  93.30  1.3449  24  666.0  20.20 363.02  23.24  13.90
+ 9.82349   0.00  18.100  0  0.6710  6.7940  98.80  1.3580  24  666.0  20.20 396.90  21.24  13.30
+23.64820   0.00  18.100  0  0.6710  6.3800  96.20  1.3861  24  666.0  20.20 396.90  23.69  13.10
+17.86670   0.00  18.100  0  0.6710  6.2230 100.00  1.3861  24  666.0  20.20 393.74  21.78  10.20
+88.97620   0.00  18.100  0  0.6710  6.9680  91.90  1.4165  24  666.0  20.20 396.90  17.21  10.40
+15.87440   0.00  18.100  0  0.6710  6.5450  99.10  1.5192  24  666.0  20.20 396.90  21.08  10.90
+ 9.18702   0.00  18.100  0  0.7000  5.5360 100.00  1.5804  24  666.0  20.20 396.90  23.60  11.30
+ 7.99248   0.00  18.100  0  0.7000  5.5200 100.00  1.5331  24  666.0  20.20 396.90  24.56  12.30
+20.08490   0.00  18.100  0  0.7000  4.3680  91.20  1.4395  24  666.0  20.20 285.83  30.63   8.80
+16.81180   0.00  18.100  0  0.7000  5.2770  98.10  1.4261  24  666.0  20.20 396.90  30.81   7.20
+24.39380   0.00  18.100  0  0.7000  4.6520 100.00  1.4672  24  666.0  20.20 396.90  28.28  10.50
+22.59710   0.00  18.100  0  0.7000  5.0000  89.50  1.5184  24  666.0  20.20 396.90  31.99   7.40
+14.33370   0.00  18.100  0  0.7000  4.8800 100.00  1.5895  24  666.0  20.20 372.92  30.62  10.20
+ 8.15174   0.00  18.100  0  0.7000  5.3900  98.90  1.7281  24  666.0  20.20 396.90  20.85  11.50
+ 6.96215   0.00  18.100  0  0.7000  5.7130  97.00  1.9265  24  666.0  20.20 394.43  17.11  15.10
+ 5.29305   0.00  18.100  0  0.7000  6.0510  82.50  2.1678  24  666.0  20.20 378.38  18.76  23.20
+11.57790   0.00  18.100  0  0.7000  5.0360  97.00  1.7700  24  666.0  20.20 396.90  25.68   9.70
+ 8.64476   0.00  18.100  0  0.6930  6.1930  92.60  1.7912  24  666.0  20.20 396.90  15.17  13.80
+13.35980   0.00  18.100  0  0.6930  5.8870  94.70  1.7821  24  666.0  20.20 396.90  16.35  12.70
+ 8.71675   0.00  18.100  0  0.6930  6.4710  98.80  1.7257  24  666.0  20.20 391.98  17.12  13.10
+ 5.87205   0.00  18.100  0  0.6930  6.4050  96.00  1.6768  24  666.0  20.20 396.90  19.37  12.50
+ 7.67202   0.00  18.100  0  0.6930  5.7470  98.90  1.6334  24  666.0  20.20 393.10  19.92   8.50
+38.35180   0.00  18.100  0  0.6930  5.4530 100.00  1.4896  24  666.0  20.20 396.90  30.59   5.00
+ 9.91655   0.00  18.100  0  0.6930  5.8520  77.80  1.5004  24  666.0  20.20 338.16  29.97   6.30
+25.04610   0.00  18.100  0  0.6930  5.9870 100.00  1.5888  24  666.0  20.20 396.90  26.77   5.60
+14.23620   0.00  18.100  0  0.6930  6.3430 100.00  1.5741  24  666.0  20.20 396.90  20.32   7.20
+ 9.59571   0.00  18.100  0  0.6930  6.4040 100.00  1.6390  24  666.0  20.20 376.11  20.31  12.10
+24.80170   0.00  18.100  0  0.6930  5.3490  96.00  1.7028  24  666.0  20.20 396.90  19.77   8.30
+41.52920   0.00  18.100  0  0.6930  5.5310  85.40  1.6074  24  666.0  20.20 329.46  27.38   8.50
+67.92080   0.00  18.100  0  0.6930  5.6830 100.00  1.4254  24  666.0  20.20 384.97  22.98   5.00
+20.71620   0.00  18.100  0  0.6590  4.1380 100.00  1.1781  24  666.0  20.20 370.22  23.34  11.90
+11.95110   0.00  18.100  0  0.6590  5.6080 100.00  1.2852  24  666.0  20.20 332.09  12.13  27.90
+ 7.40389   0.00  18.100  0  0.5970  5.6170  97.90  1.4547  24  666.0  20.20 314.64  26.40  17.20
+14.43830   0.00  18.100  0  0.5970  6.8520 100.00  1.4655  24  666.0  20.20 179.36  19.78  27.50
+51.13580   0.00  18.100  0  0.5970  5.7570 100.00  1.4130  24  666.0  20.20   2.60  10.11  15.00
+14.05070   0.00  18.100  0  0.5970  6.6570 100.00  1.5275  24  666.0  20.20  35.05  21.22  17.20
+18.81100   0.00  18.100  0  0.5970  4.6280 100.00  1.5539  24  666.0  20.20  28.79  34.37  17.90
+28.65580   0.00  18.100  0  0.5970  5.1550 100.00  1.5894  24  666.0  20.20 210.97  20.08  16.30
+45.74610   0.00  18.100  0  0.6930  4.5190 100.00  1.6582  24  666.0  20.20  88.27  36.98   7.00
+18.08460   0.00  18.100  0  0.6790  6.4340 100.00  1.8347  24  666.0  20.20  27.25  29.05   7.20
+10.83420   0.00  18.100  0  0.6790  6.7820  90.80  1.8195  24  666.0  20.20  21.57  25.79   7.50
+25.94060   0.00  18.100  0  0.6790  5.3040  89.10  1.6475  24  666.0  20.20 127.36  26.64  10.40
+73.53410   0.00  18.100  0  0.6790  5.9570 100.00  1.8026  24  666.0  20.20  16.45  20.62   8.80
+11.81230   0.00  18.100  0  0.7180  6.8240  76.50  1.7940  24  666.0  20.20  48.45  22.74   8.40
+11.08740   0.00  18.100  0  0.7180  6.4110 100.00  1.8589  24  666.0  20.20 318.75  15.02  16.70
+ 7.02259   0.00  18.100  0  0.7180  6.0060  95.30  1.8746  24  666.0  20.20 319.98  15.70  14.20
+12.04820   0.00  18.100  0  0.6140  5.6480  87.60  1.9512  24  666.0  20.20 291.55  14.10  20.80
+ 7.05042   0.00  18.100  0  0.6140  6.1030  85.10  2.0218  24  666.0  20.20   2.52  23.29  13.40
+ 8.79212   0.00  18.100  0  0.5840  5.5650  70.60  2.0635  24  666.0  20.20   3.65  17.16  11.70
+15.86030   0.00  18.100  0  0.6790  5.8960  95.40  1.9096  24  666.0  20.20   7.68  24.39   8.30
+12.24720   0.00  18.100  0  0.5840  5.8370  59.70  1.9976  24  666.0  20.20  24.65  15.69  10.20
+37.66190   0.00  18.100  0  0.6790  6.2020  78.70  1.8629  24  666.0  20.20  18.82  14.52  10.90
+ 7.36711   0.00  18.100  0  0.6790  6.1930  78.10  1.9356  24  666.0  20.20  96.73  21.52  11.00
+ 9.33889   0.00  18.100  0  0.6790  6.3800  95.60  1.9682  24  666.0  20.20  60.72  24.08   9.50
+ 8.49213   0.00  18.100  0  0.5840  6.3480  86.10  2.0527  24  666.0  20.20  83.45  17.64  14.50
+10.06230   0.00  18.100  0  0.5840  6.8330  94.30  2.0882  24  666.0  20.20  81.33  19.69  14.10
+ 6.44405   0.00  18.100  0  0.5840  6.4250  74.80  2.2004  24  666.0  20.20  97.95  12.03  16.10
+ 5.58107   0.00  18.100  0  0.7130  6.4360  87.90  2.3158  24  666.0  20.20 100.19  16.22  14.30
+13.91340   0.00  18.100  0  0.7130  6.2080  95.00  2.2222  24  666.0  20.20 100.63  15.17  11.70
+11.16040   0.00  18.100  0  0.7400  6.6290  94.60  2.1247  24  666.0  20.20 109.85  23.27  13.40
+14.42080   0.00  18.100  0  0.7400  6.4610  93.30  2.0026  24  666.0  20.20  27.49  18.05   9.60
+15.17720   0.00  18.100  0  0.7400  6.1520 100.00  1.9142  24  666.0  20.20   9.32  26.45   8.70
+13.67810   0.00  18.100  0  0.7400  5.9350  87.90  1.8206  24  666.0  20.20  68.95  34.02   8.40
+ 9.39063   0.00  18.100  0  0.7400  5.6270  93.90  1.8172  24  666.0  20.20 396.90  22.88  12.80
+22.05110   0.00  18.100  0  0.7400  5.8180  92.40  1.8662  24  666.0  20.20 391.45  22.11  10.50
+ 9.72418   0.00  18.100  0  0.7400  6.4060  97.20  2.0651  24  666.0  20.20 385.96  19.52  17.10
+ 5.66637   0.00  18.100  0  0.7400  6.2190 100.00  2.0048  24  666.0  20.20 395.69  16.59  18.40
+ 9.96654   0.00  18.100  0  0.7400  6.4850 100.00  1.9784  24  666.0  20.20 386.73  18.85  15.40
+12.80230   0.00  18.100  0  0.7400  5.8540  96.60  1.8956  24  666.0  20.20 240.52  23.79  10.80
+10.67180   0.00  18.100  0  0.7400  6.4590  94.80  1.9879  24  666.0  20.20  43.06  23.98  11.80
+ 6.28807   0.00  18.100  0  0.7400  6.3410  96.40  2.0720  24  666.0  20.20 318.01  17.79  14.90
+ 9.92485   0.00  18.100  0  0.7400  6.2510  96.60  2.1980  24  666.0  20.20 388.52  16.44  12.60
+ 9.32909   0.00  18.100  0  0.7130  6.1850  98.70  2.2616  24  666.0  20.20 396.90  18.13  14.10
+ 7.52601   0.00  18.100  0  0.7130  6.4170  98.30  2.1850  24  666.0  20.20 304.21  19.31  13.00
+ 6.71772   0.00  18.100  0  0.7130  6.7490  92.60  2.3236  24  666.0  20.20   0.32  17.44  13.40
+ 5.44114   0.00  18.100  0  0.7130  6.6550  98.20  2.3552  24  666.0  20.20 355.29  17.73  15.20
+ 5.09017   0.00  18.100  0  0.7130  6.2970  91.80  2.3682  24  666.0  20.20 385.09  17.27  16.10
+ 8.24809   0.00  18.100  0  0.7130  7.3930  99.30  2.4527  24  666.0  20.20 375.87  16.74  17.80
+ 9.51363   0.00  18.100  0  0.7130  6.7280  94.10  2.4961  24  666.0  20.20   6.68  18.71  14.90
+ 4.75237   0.00  18.100  0  0.7130  6.5250  86.50  2.4358  24  666.0  20.20  50.92  18.13  14.10
+ 4.66883   0.00  18.100  0  0.7130  5.9760  87.90  2.5806  24  666.0  20.20  10.48  19.01  12.70
+ 8.20058   0.00  18.100  0  0.7130  5.9360  80.30  2.7792  24  666.0  20.20   3.50  16.94  13.50
+ 7.75223   0.00  18.100  0  0.7130  6.3010  83.70  2.7831  24  666.0  20.20 272.21  16.23  14.90
+ 6.80117   0.00  18.100  0  0.7130  6.0810  84.40  2.7175  24  666.0  20.20 396.90  14.70  20.00
+ 4.81213   0.00  18.100  0  0.7130  6.7010  90.00  2.5975  24  666.0  20.20 255.23  16.42  16.40
+ 3.69311   0.00  18.100  0  0.7130  6.3760  88.40  2.5671  24  666.0  20.20 391.43  14.65  17.70
+ 6.65492   0.00  18.100  0  0.7130  6.3170  83.00  2.7344  24  666.0  20.20 396.90  13.99  19.50
+ 5.82115   0.00  18.100  0  0.7130  6.5130  89.90  2.8016  24  666.0  20.20 393.82  10.29  20.20
+ 7.83932   0.00  18.100  0  0.6550  6.2090  65.40  2.9634  24  666.0  20.20 396.90  13.22  21.40
+ 3.16360   0.00  18.100  0  0.6550  5.7590  48.20  3.0665  24  666.0  20.20 334.40  14.13  19.90
+ 3.77498   0.00  18.100  0  0.6550  5.9520  84.70  2.8715  24  666.0  20.20  22.01  17.15  19.00
+ 4.42228   0.00  18.100  0  0.5840  6.0030  94.50  2.5403  24  666.0  20.20 331.29  21.32  19.10
+15.57570   0.00  18.100  0  0.5800  5.9260  71.00  2.9084  24  666.0  20.20 368.74  18.13  19.10
+13.07510   0.00  18.100  0  0.5800  5.7130  56.70  2.8237  24  666.0  20.20 396.90  14.76  20.10
+ 4.34879   0.00  18.100  0  0.5800  6.1670  84.00  3.0334  24  666.0  20.20 396.90  16.29  19.90
+ 4.03841   0.00  18.100  0  0.5320  6.2290  90.70  3.0993  24  666.0  20.20 395.33  12.87  19.60
+ 3.56868   0.00  18.100  0  0.5800  6.4370  75.00  2.8965  24  666.0  20.20 393.37  14.36  23.20
+ 4.64689   0.00  18.100  0  0.6140  6.9800  67.60  2.5329  24  666.0  20.20 374.68  11.66  29.80
+ 8.05579   0.00  18.100  0  0.5840  5.4270  95.40  2.4298  24  666.0  20.20 352.58  18.14  13.80
+ 6.39312   0.00  18.100  0  0.5840  6.1620  97.40  2.2060  24  666.0  20.20 302.76  24.10  13.30
+ 4.87141   0.00  18.100  0  0.6140  6.4840  93.60  2.3053  24  666.0  20.20 396.21  18.68  16.70
+15.02340   0.00  18.100  0  0.6140  5.3040  97.30  2.1007  24  666.0  20.20 349.48  24.91  12.00
+10.23300   0.00  18.100  0  0.6140  6.1850  96.70  2.1705  24  666.0  20.20 379.70  18.03  14.60
+14.33370   0.00  18.100  0  0.6140  6.2290  88.00  1.9512  24  666.0  20.20 383.32  13.11  21.40
+ 5.82401   0.00  18.100  0  0.5320  6.2420  64.70  3.4242  24  666.0  20.20 396.90  10.74  23.00
+ 5.70818   0.00  18.100  0  0.5320  6.7500  74.90  3.3317  24  666.0  20.20 393.07   7.74  23.70
+ 5.73116   0.00  18.100  0  0.5320  7.0610  77.00  3.4106  24  666.0  20.20 395.28   7.01  25.00
+ 2.81838   0.00  18.100  0  0.5320  5.7620  40.30  4.0983  24  666.0  20.20 392.92  10.42  21.80
+ 2.37857   0.00  18.100  0  0.5830  5.8710  41.90  3.7240  24  666.0  20.20 370.73  13.34  20.60
+ 3.67367   0.00  18.100  0  0.5830  6.3120  51.90  3.9917  24  666.0  20.20 388.62  10.58  21.20
+ 5.69175   0.00  18.100  0  0.5830  6.1140  79.80  3.5459  24  666.0  20.20 392.68  14.98  19.10
+ 4.83567   0.00  18.100  0  0.5830  5.9050  53.20  3.1523  24  666.0  20.20 388.22  11.45  20.60
+ 0.15086   0.00  27.740  0  0.6090  5.4540  92.70  1.8209   4  711.0  20.10 395.09  18.06  15.20
+ 0.18337   0.00  27.740  0  0.6090  5.4140  98.30  1.7554   4  711.0  20.10 344.05  23.97   7.00
+ 0.20746   0.00  27.740  0  0.6090  5.0930  98.00  1.8226   4  711.0  20.10 318.43  29.68   8.10
+ 0.10574   0.00  27.740  0  0.6090  5.9830  98.80  1.8681   4  711.0  20.10 390.11  18.07  13.60
+ 0.11132   0.00  27.740  0  0.6090  5.9830  83.50  2.1099   4  711.0  20.10 396.90  13.35  20.10
+ 0.17331   0.00   9.690  0  0.5850  5.7070  54.00  2.3817   6  391.0  19.20 396.90  12.01  21.80
+ 0.27957   0.00   9.690  0  0.5850  5.9260  42.60  2.3817   6  391.0  19.20 396.90  13.59  24.50
+ 0.17899   0.00   9.690  0  0.5850  5.6700  28.80  2.7986   6  391.0  19.20 393.29  17.60  23.10
+ 0.28960   0.00   9.690  0  0.5850  5.3900  72.90  2.7986   6  391.0  19.20 396.90  21.14  19.70
+ 0.26838   0.00   9.690  0  0.5850  5.7940  70.60  2.8927   6  391.0  19.20 396.90  14.10  18.30
+ 0.23912   0.00   9.690  0  0.5850  6.0190  65.30  2.4091   6  391.0  19.20 396.90  12.92  21.20
+ 0.17783   0.00   9.690  0  0.5850  5.5690  73.50  2.3999   6  391.0  19.20 395.77  15.10  17.50
+ 0.22438   0.00   9.690  0  0.5850  6.0270  79.70  2.4982   6  391.0  19.20 396.90  14.33  16.80
+ 0.06263   0.00  11.930  0  0.5730  6.5930  69.10  2.4786   1  273.0  21.00 391.99   9.67  22.40
+ 0.04527   0.00  11.930  0  0.5730  6.1200  76.70  2.2875   1  273.0  21.00 396.90   9.08  20.60
+ 0.06076   0.00  11.930  0  0.5730  6.9760  91.00  2.1675   1  273.0  21.00 396.90   5.64  23.90
+ 0.10959   0.00  11.930  0  0.5730  6.7940  89.30  2.3889   1  273.0  21.00 393.45   6.48  22.00
+ 0.04741   0.00  11.930  0  0.5730  6.0300  80.80  2.5050   1  273.0  21.00 396.90   7.88  11.90

From 1a253ff82a9b244866a0e20fe06444dda6c0bcd4 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 27 Mar 2015 15:14:52 +0000
Subject: [PATCH 133/166] Added safe_exp and tests

---
 GPy/likelihoods/likelihood.py |  2 +-
 GPy/testing/misc_tests.py     | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 GPy/testing/misc_tests.py

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 022670a5..2e55ddb9 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -5,7 +5,7 @@ import numpy as np
 from scipy import stats,special
 import scipy as sp
 import link_functions
-from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian
+from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
 from scipy.integrate import quad
 import warnings
 from ..core.parameterization import Parameterized
diff --git a/GPy/testing/misc_tests.py b/GPy/testing/misc_tests.py
new file mode 100644
index 00000000..e620fa7e
--- /dev/null
+++ b/GPy/testing/misc_tests.py
@@ -0,0 +1,18 @@
+import numpy as np
+import scipy as sp
+import GPy
+
+class MiscTests(np.testing.TestCase):
+    """
+    Testing some utilities of misc
+    """
+    def setUp(self):
+        self._lim_val = np.finfo(np.float64).max
+        self._lim_val_exp = np.log(self._lim_val)
+
+    def test_safe_exp_upper(self):
+        assert np.exp(self._lim_val_exp + 1) == np.inf
+        assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
+
+    def test_safe_exp_lower(self):
+        assert GPy.util.misc.safe_exp(1e-10) < np.inf

From 582aa4f40618048abf597b25058f345684a98299 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 27 Mar 2015 15:30:40 +0000
Subject: [PATCH 134/166] More samples for predictive quantile

---
 GPy/likelihoods/likelihood.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 2e55ddb9..1295245c 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -529,7 +529,7 @@ class Likelihood(Parameterized):
 
     def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
         #compute the quantiles by sampling!!!
-        N_samp = 50
+        N_samp = 500
         s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
         #ss_f = s.flatten()
         #ss_y = self.samples(ss_f, Y_metadata)

From 2c7582516de6675925ce88db7991c1f97e91daec Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 30 Mar 2015 13:41:25 +0100
Subject: [PATCH 135/166] adding a comment to clarify predictive_gradeints
 (Thanks AT)

---
 GPy/core/gp.py              | 4 +++-
 GPy/kern/_src/stationary.py | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 38a7bb3d..52385c5a 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -241,12 +241,14 @@ class GP(Model):
 
     def predictive_gradients(self, Xnew):
         """
-        Compute the derivatives of the latent function with respect to X*
+        Compute the derivatives of the predicted latent function with respect to X*
 
         Given a set of points at which to predict X* (size [N*,Q]), compute the
         derivatives of the mean and variance. Resulting arrays are sized:
          dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
 
+        Note that this is not the same as computing the mean and variance of the derivative of the function!
+
          dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
         :param X: The points at which to get the predictive gradients
         :type X: np.ndarray (Xnew x self.input_dim)
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 06671b23..5fa846d5 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -296,6 +296,8 @@ class Exponential(Stationary):
         return -0.5*self.K_of_r(r)
 
 
+
+
 class OU(Stationary):
     """
     OU kernel:

From 4f0894b6b703aa21b50cde74c4847d4e917f3dd8 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 30 Mar 2015 15:25:59 +0100
Subject: [PATCH 136/166] change the name of kernel DiffGenomeKern to DEtime

---
 GPy/kern/__init__.py       | 3 ++-
 GPy/kern/_src/splitKern.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 718be74f..0e1f8a0d 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -16,5 +16,6 @@ from _src.poly import Poly
 from _src.eq_ode2 import EQ_ODE2
 
 from _src.trunclinear import TruncLinear,TruncLinear_inf
-from _src.splitKern import SplitKern,DiffGenomeKern
+from _src.splitKern import SplitKern,DEtime
+from _src.splitKern import DEtime as DiffGenomeKern
 
diff --git a/GPy/kern/_src/splitKern.py b/GPy/kern/_src/splitKern.py
index 27e4f76b..3b2e5716 100644
--- a/GPy/kern/_src/splitKern.py
+++ b/GPy/kern/_src/splitKern.py
@@ -7,7 +7,7 @@ from kern import Kern,CombinationKernel
 from .independent_outputs import index_to_slices
 import itertools
 
-class DiffGenomeKern(Kern):
+class DEtime(Kern):
 
     def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
         self.idx_p = idx_p

From edbb576bfcfd0755319961412d8f72a10c819ece Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 30 Mar 2015 21:49:02 +0100
Subject: [PATCH 137/166] fallback the implementation of spike and slab prior

---
 GPy/core/parameterization/variational.py      | 32 ++++---------------
 .../var_dtc_parallel.py                       |  6 ++--
 GPy/kern/_src/psi_comp/ssrbf_psi_comp.py      | 20 +++++++-----
 GPy/models/ss_gplvm.py                        |  5 ++-
 4 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 7cc5c99a..43e8d096 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
     def KL_divergence(self, variational_posterior):
         mu = variational_posterior.mean
         S = variational_posterior.variance
-        gamma,gamma1 = variational_posterior.gamma_probabilities()
-        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
+        gamma = variational_posterior.gamma.values
         if len(self.pi.shape)==2:
-            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+            idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
             pi = self.pi[idx]
         else:
             pi = self.pi
             
         var_mean = np.square(mu)/self.variance
         var_S = (S/self.variance - np.log(S))
-        var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
+        var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
         return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
 
     def update_gradients_KL(self, variational_posterior):
         mu = variational_posterior.mean
         S = variational_posterior.variance
-        gamma,gamma1 = variational_posterior.gamma_probabilities()
-        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
+        gamma = variational_posterior.gamma.values
         if len(self.pi.shape)==2:
-            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+            idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
             pi = self.pi[idx]
         else:
             pi = self.pi
 
-        variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
+        variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
         mu.gradient -= gamma*mu/self.variance
         S.gradient -= (1./self.variance - 1./S) * gamma /2.
         if self.learnPi:
@@ -162,24 +160,8 @@ class SpikeAndSlabPosterior(VariationalPosterior):
         binary_prob : the probability of the distribution on the slab part.
         """
         super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
-        self.gamma = Param("binary_prob",binary_prob)
+        self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
         self.link_parameter(self.gamma)
-        
-    @Cache_this(limit=5)
-    def gamma_probabilities(self):
-        prob = np.zeros_like(param_to_array(self.gamma))
-        prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
-        prob1 = -np.zeros_like(param_to_array(self.gamma))
-        prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
-        return prob, prob1
-    
-    @Cache_this(limit=5)
-    def gamma_log_prob(self):
-        loggamma = param_to_array(self.gamma).copy()
-        loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
-        loggamma1 = -param_to_array(self.gamma).copy()
-        loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
-        return loggamma,loggamma1
 
     def set_gradients(self, grad):
         self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index cac69872..2e633e16 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
 
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
-        Lm = jitchol(Kmm, maxtries=100)
+        if not np.isfinite(Kmm).all():
+            print Kmm
+        Lm = jitchol(Kmm)
 
         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
-        LL = jitchol(Lambda, maxtries=100)
+        LL = jitchol(Lambda)
         logdet_L = 2.*np.sum(np.log(np.diag(LL)))
         b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
         bbt = np.square(b).sum()
diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py
index 18a4d751..f6a24c86 100644
--- a/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py
+++ b/GPy/kern/_src/psi_comp/ssrbf_psi_comp.py
@@ -22,12 +22,14 @@ try:
         # _psi1                NxM
         mu = variational_posterior.mean
         S = variational_posterior.variance
+        gamma = variational_posterior.binary_prob
          
         N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
         l2 = np.square(lengthscale)
         log_denom1 = np.log(S/l2+1)
         log_denom2 = np.log(2*S/l2+1)
-        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
+        log_gamma = np.log(gamma)
+        log_gamma1 = np.log(1.-gamma)
         variance = float(variance)
         psi0 = np.empty(N)
         psi0[:] = variance
@@ -37,6 +39,7 @@ try:
         from ....util.misc import param_to_array
         S = param_to_array(S)
         mu = param_to_array(mu)
+        gamma = param_to_array(gamma)
         Z = param_to_array(Z)
          
         support_code = """
@@ -79,7 +82,7 @@ try:
             }
         }
         """
-        weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
+        weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
      
         psi2 = psi2n.sum(axis=0)
         return psi0,psi1,psi2,psi2n
@@ -94,12 +97,13 @@ try:
      
         mu = variational_posterior.mean
         S = variational_posterior.variance
+        gamma = variational_posterior.binary_prob
         N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
         l2 = np.square(lengthscale)
         log_denom1 = np.log(S/l2+1)
         log_denom2 = np.log(2*S/l2+1)
-        log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
-        gamma, gamma1 = variational_posterior.gamma_probabilities()
+        log_gamma = np.log(gamma)
+        log_gamma1 = np.log(1.-gamma)
         variance = float(variance)
      
         dvar = np.zeros(1)
@@ -113,6 +117,7 @@ try:
         from ....util.misc import param_to_array
         S = param_to_array(S)
         mu = param_to_array(mu)
+        gamma = param_to_array(gamma)
         Z = param_to_array(Z)
          
         support_code = """
@@ -130,7 +135,6 @@ try:
                         double Zm1q = Z(m1,q);
                         double Zm2q = Z(m2,q);
                         double gnq = gamma(n,q);
-                        double g1nq = gamma1(n,q);
                         double mu_nq = mu(n,q);
                          
                         if(m2==0) {
@@ -156,7 +160,7 @@ try:
                              
                             dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
                             dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
-                            dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
+                            dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
                             dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
                             dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
                         }
@@ -184,7 +188,7 @@ try:
                          
                         dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
                         dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
-                        dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
+                        dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
                         dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
                         dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;                   
                     }
@@ -192,7 +196,7 @@ try:
             }
         }
         """
-        weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
+        weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
      
         dl *= 2.*lengthscale
         if not ARD:
diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py
index a61ad2a0..04006d84 100644
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@@ -39,7 +39,10 @@ class SSGPLVM(SparseGP_MPI):
             X_variance = np.random.uniform(0,.1,X.shape)
             
         if Gamma is None:
-            gamma = np.random.randn(X.shape[0], input_dim)
+            gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
+            gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
+            gamma[gamma>1.-1e-9] = 1.-1e-9
+            gamma[gamma<1e-9] = 1e-9
         else:
             gamma = Gamma.copy()
                 

From 7fa0c19a88c102516904ad007164f0276a095309 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 30 Mar 2015 22:24:48 +0100
Subject: [PATCH 138/166] optimize sslinear kernel

---
 GPy/kern/_src/psi_comp/sslinear_psi_comp.py | 44 +++++++++------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/GPy/kern/_src/psi_comp/sslinear_psi_comp.py b/GPy/kern/_src/psi_comp/sslinear_psi_comp.py
index 5f261785..d431cd61 100644
--- a/GPy/kern/_src/psi_comp/sslinear_psi_comp.py
+++ b/GPy/kern/_src/psi_comp/sslinear_psi_comp.py
@@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
 
     # Compute for psi0 and psi1
     mu2S = np.square(mu)+S
-    dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
-    dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
-    dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
-    dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
-    dL_dZ +=  np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
+    dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
+    dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
+    dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
+    dL_dS += dL_dpsi0[:,None]*variance*gamma
+    dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
     
     return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
 
@@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
     gamma2 = np.square(gamma)
     variance2 = np.square(variance)
     mu2S = mu2+S # NxQ
-    gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
-    common_sum = np.einsum('nq,mq->nm',gvm,Z)
-#     common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
-    Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
+    gvm = gamma*mu*variance
+    common_sum = gvm.dot(Z.T)
+    Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
+    Z_expect_var2 = Z_expect*variance2
     dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
-    tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
-    common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
-#     common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
-    Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
-    Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
+    common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
+    Z2_expect = common_sum.dot(dL_dpsi2T)
+    Z1_expect = dL_dpsi2T.dot(Z)
     
-    dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
-        np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
+    dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
         
-    dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
-        np.einsum('nq,q,nq->nq',common_expect,variance,mu)
+    dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
+                
+    dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
+
+    dL_dS = gamma*Z_expect_var2
     
-    dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
-            np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
-                    
-    dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
-    
-#     dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
-    dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
+    dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
 
     return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

From 09c8d5a56769c974e9d6c417d0332237a23c9182 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 1 Apr 2015 09:14:03 +0100
Subject: [PATCH 139/166] whitespace

---
 GPy/mappings/mlp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/mappings/mlp.py b/GPy/mappings/mlp.py
index f0fe21e5..4afc2fa1 100644
--- a/GPy/mappings/mlp.py
+++ b/GPy/mappings/mlp.py
@@ -48,7 +48,7 @@ class MLP(Mapping):
         # Backpropagation to hidden layer.
         dL_dact = np.dot(dL_dF, self.W2.T)
         dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
-        
+
         return np.dot(dL_dlayer1, self.W1.T)
 
 

From 592414ce64bff32f567d04a262ab715411c5dcfc Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 1 Apr 2015 09:37:10 +0100
Subject: [PATCH 140/166] tests to probe the mean-function functionality

---
 GPy/testing/meanfunc_tests.py | 56 +++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 GPy/testing/meanfunc_tests.py

diff --git a/GPy/testing/meanfunc_tests.py b/GPy/testing/meanfunc_tests.py
new file mode 100644
index 00000000..1d875377
--- /dev/null
+++ b/GPy/testing/meanfunc_tests.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2015, James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import unittest
+import numpy as np
+import GPy
+
+class MFtests(unittest.TestCase):
+    def simple_mean_function():
+        """
+        The simplest possible mean function. No parameters, just a simple Sinusoid.
+        """
+        #create  simple mean function
+        mf = GPy.core.Mapping(1,1)
+        mf.f = np.sin
+        mf.update_gradients = lambda a,b: None
+
+        X = np.linspace(0,10,50).reshape(-1,1)
+        Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
+
+        k =GPy.kern.RBF(1)
+        lik = GPy.likelihoods.Gaussian()
+        m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
+        self.assertTrue(m.checkgrad())
+
+    def test_parametric_mean_function(self):
+        """
+        A linear mean function with parameters that we'll learn alongside the kernel
+        """
+
+        X = np.linspace(0,10,50).reshape(-1,1)
+        Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
+
+        mf = GPy.mappings.Linear(1,1)
+
+        k =GPy.kern.RBF(1)
+        lik = GPy.likelihoods.Gaussian()
+        m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
+        self.assertTrue(m.checkgrad())
+
+    def test_svgp_mean_function(self):
+
+        # an instance of the SVIGOP with a men function
+        X = np.linspace(0,10,500).reshape(-1,1)
+        Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
+        Y = np.where(Y>0, 1,0) # make aclassificatino problem
+
+        mf = GPy.mappings.Linear(1,1)
+        Z = np.linspace(0,10,50).reshape(-1,1)
+        lik = GPy.likelihoods.Bernoulli()
+        k =GPy.kern.RBF(1) + GPy.kern.White(1, 1e-4)
+        m = GPy.core.SVGP(X, Y,Z=Z, kernel=k, likelihood=lik, mean_function=mf)
+        self.assertTrue(m.checkgrad())
+
+
+

From 27c65003d25bcf79825c8847fe173254e225ed44 Mon Sep 17 00:00:00 2001
From: Mike Croucher <Michael.P.Croucher@googlemail.com>
Date: Wed, 1 Apr 2015 13:23:06 +0100
Subject: [PATCH 141/166] Working in Py2 but broken in Py3

---
 GPy/util/choleskies.py | 104 ++++++++++++++++++++---------------------
 GPy/util/misc.py       |   2 +-
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index 7e068933..b64beae1 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -2,7 +2,7 @@
 # Licensed under the GNU GPL version 3.0
 
 import numpy as np
-#from scipy import weave
+from scipy import weave
 from . import linalg
 
 def safe_root(N):
@@ -12,58 +12,58 @@ def safe_root(N):
         raise ValueError("N is not square!")
     return j
 
-#def flat_to_triang(flat):
-#    """take a matrix N x D and return a M X M x D array where
-#
-#    N = M(M+1)/2
-#
-#    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
-#    """
-#    N, D = flat.shape
-#    M = (-1 + safe_root(8*N+1))/2
-#    ret = np.zeros((M, M, D))
-#    flat = np.ascontiguousarray(flat)
-#
-#    code = """
-#    int count = 0;
-#    for(int m=0; m<M; m++)
-#    {
-#      for(int mm=0; mm<=m; mm++)
-#      {
-#        for(int d=0; d<D; d++)
-#        {
-#          ret[d + m*D*M + mm*D] = flat[count];
-#          count++;
-#        }
-#      }
-#    }
-#    """
-#   weave.inline(code, ['flat', 'ret', 'D', 'M'])
-#    return ret
+def flat_to_triang(flat):
+    """take a matrix N x D and return a M X M x D array where
 
-#def triang_to_flat(L):
-#    M, _, D = L.shape
-#
-#    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
-#
-#    N = M*(M+1)/2
-#    flat = np.empty((N, D))
-#    code = """
-#    int count = 0;
-#    for(int m=0; m<M; m++)
-#    {
-#      for(int mm=0; mm<=m; mm++)
-#      {
-#        for(int d=0; d<D; d++)
-#        {
-#          flat[count] = L[d + m*D*M + mm*D];
-#          count++;
-#        }
-#      }
-#    }
-#    """
-#    weave.inline(code, ['flat', 'L', 'D', 'M'])
-#    return flat
+    N = M(M+1)/2
+
+    the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
+    """
+    N, D = flat.shape
+    M = (-1 + safe_root(8*N+1))/2
+    ret = np.zeros((M, M, D))
+    flat = np.ascontiguousarray(flat)
+
+    code = """
+    int count = 0;
+    for(int m=0; m<M; m++)
+    {
+      for(int mm=0; mm<=m; mm++)
+      {
+        for(int d=0; d<D; d++)
+        {
+          ret[d + m*D*M + mm*D] = flat[count];
+          count++;
+        }
+      }
+    }
+    """
+    weave.inline(code, ['flat', 'ret', 'D', 'M'])
+    return ret
+
+def triang_to_flat(L):
+    M, _, D = L.shape
+
+    L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
+
+    N = M*(M+1)/2
+    flat = np.empty((N, D))
+    code = """
+    int count = 0;
+    for(int m=0; m<M; m++)
+    {
+      for(int mm=0; mm<=m; mm++)
+      {
+        for(int d=0; d<D; d++)
+        {
+          flat[count] = L[d + m*D*M + mm*D];
+          count++;
+        }
+      }
+    }
+    """
+    weave.inline(code, ['flat', 'L', 'D', 'M'])
+    return flat
 
 def triang_to_cov(L):
     return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 2db61ef0..66b7b3b9 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -24,7 +24,7 @@ def chain_1(df_dg, dg_dx):
     if np.all(dg_dx==1.):
         return df_dg
     if len(df_dg) > 1 and df_dg.shape[-1] > 1:
-        import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
+        #import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
         raise NotImplementedError('Not implemented for matricies yet')
     return df_dg * dg_dx
 

From 985b2ea70c2fb7358e7101309c4472733f269834 Mon Sep 17 00:00:00 2001
From: Mike Croucher <Michael.P.Croucher@googlemail.com>
Date: Wed, 1 Apr 2015 15:42:49 +0100
Subject: [PATCH 142/166] Added (SLOW) Pure Python implementations of
 flat_to_triang and triang_to_flat

---
 .../var_dtc_parallel.py                       |  2 +-
 GPy/testing/mapping_tests.py                  |  7 +--
 GPy/util/choleskies.py                        | 47 +++++++++++++++++--
 GPy/util/linalg.py                            |  4 +-
 4 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index c546a4a1..4b884d4c 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -170,7 +170,7 @@ class VarDTC_minibatch(LatentFunctionInference):
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
         if not np.isfinite(Kmm).all():
-            print Kmm
+            print(Kmm)
         Lm = jitchol(Kmm)
 
         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
diff --git a/GPy/testing/mapping_tests.py b/GPy/testing/mapping_tests.py
index 2e32dad3..2ff0e2d8 100644
--- a/GPy/testing/mapping_tests.py
+++ b/GPy/testing/mapping_tests.py
@@ -26,11 +26,6 @@ class MappingGradChecker(GPy.core.Model):
         self.mapping.update_gradients(self.dL_dY, self.X)
 
 
-
-
-
-
-
 class MappingTests(unittest.TestCase):
 
     def test_kernelmapping(self):
@@ -68,5 +63,5 @@ class MappingTests(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    print "Running unit tests, please be (very) patient..."
+    print("Running unit tests, please be (very) patient...")
     unittest.main()
diff --git a/GPy/util/choleskies.py b/GPy/util/choleskies.py
index b64beae1..37ac7211 100644
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@@ -2,8 +2,13 @@
 # Licensed under the GNU GPL version 3.0
 
 import numpy as np
-from scipy import weave
 from . import linalg
+from .config import config
+
+try:
+    from scipy import weave
+except ImportError:
+    config.set('weave', 'working', 'False')
 
 def safe_root(N):
     i = np.sqrt(N)
@@ -12,12 +17,13 @@ def safe_root(N):
         raise ValueError("N is not square!")
     return j
 
-def flat_to_triang(flat):
+def _flat_to_triang_weave(flat):
     """take a matrix N x D and return a M X M x D array where
 
     N = M(M+1)/2
 
     the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
+    This is the weave implementation
     """
     N, D = flat.shape
     M = (-1 + safe_root(8*N+1))/2
@@ -41,7 +47,24 @@ def flat_to_triang(flat):
     weave.inline(code, ['flat', 'ret', 'D', 'M'])
     return ret
 
-def triang_to_flat(L):
+def _flat_to_triang_pure(flat_mat):
+    N, D = flat_mat.shape
+    M = (-1 + safe_root(8*N+1))//2
+    ret = np.zeros((M, M, D))
+    count = 0
+    for m in range(M):
+        for mm in range(m+1):
+            for d in range(D):
+              ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count];
+              count = count+1
+    return ret
+
+if config.getboolean('weave', 'working'):
+	flat_to_triang =  _flat_to_triang_weave
+else:
+        flat_to_triang =  _flat_to_triang_pure
+
+def _triang_to_flat_weave(L):
     M, _, D = L.shape
 
     L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
@@ -65,6 +88,24 @@ def triang_to_flat(L):
     weave.inline(code, ['flat', 'L', 'D', 'M'])
     return flat
 
+def _triang_to_flat_pure(L):
+    M, _, D = L.shape
+
+    N = M*(M+1)//2
+    flat = np.empty((N, D))
+    count = 0;
+    for m in range(M):
+        for mm in range(m+1):
+            for d in range(D):
+                flat.flat[count] = L.flat[d + m*D*M + mm*D];
+                count = count +1
+    return flat
+
+if config.getboolean('weave', 'working'):
+    triang_to_flat =  _triang_to_flat_weave
+else:
+    triang_to_flat =  _triang_to_flat_pure
+
 def triang_to_cov(L):
     return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
 
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index ec83810f..8ac5418f 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -102,14 +102,14 @@ def jitchol(A, maxtries=5):
         num_tries = 1
         while num_tries <= maxtries and np.isfinite(jitter):
             try:
-                print jitter
+                print(jitter)
                 L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
                 return L
             except:
                 jitter *= 10
             finally:
                 num_tries += 1
-        raise linalg.LinAlgError, "not positive definite, even with jitter."
+        raise linalg.LinAlgError("not positive definite, even with jitter.")
     import traceback
     try: raise
     except:

From 620a7842b3f908cdab26391529e0e32591978f99 Mon Sep 17 00:00:00 2001
From: Mike Croucher <Michael.P.Croucher@googlemail.com>
Date: Thu, 2 Apr 2015 07:35:24 +0100
Subject: [PATCH 143/166] Fix printing error

---
 GPy/core/verbose_optimization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/verbose_optimization.py b/GPy/core/verbose_optimization.py
index a2c1598e..a5fb019e 100644
--- a/GPy/core/verbose_optimization.py
+++ b/GPy/core/verbose_optimization.py
@@ -151,7 +151,7 @@ class VerboseOptimization(object):
             if not self.ipython_notebook:
                 print()
                 print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
-                print('Optimization status: {0:.5g}'.format(self.status))             
+                print('Optimization status: {0}'.format(self.status))             
                 print()
             elif self.clear:
                 self.hor_align.close()

From a3679b9f6135a5103e2a67514be68dff8866aa93 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 2 Apr 2015 22:37:32 +0100
Subject: [PATCH 144/166] small bugfix in white kernel

---
 GPy/kern/_src/static.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index f4223bf4..7f59f5df 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -60,7 +60,10 @@ class White(Static):
         return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
 
     def update_gradients_full(self, dL_dK, X, X2=None):
-        self.variance.gradient = np.trace(dL_dK)
+        if X2 is None:
+            self.variance.gradient = np.trace(dL_dK)
+        else:
+            self.variance.gradient = 0.
 
     def update_gradients_diag(self, dL_dKdiag, X):
         self.variance.gradient = dL_dKdiag.sum()

From 1efa842130882a1889957d863f96bf673b816657 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Wed, 8 Apr 2015 08:24:00 +0200
Subject: [PATCH 145/166] [variational] plot needed kwargs

---
 GPy/core/parameterization/variational.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 7cc5c99a..36079f2e 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -141,7 +141,7 @@ class NormalPosterior(VariationalPosterior):
     holds the means and variances for a factorizing multivariate normal distribution
     '''
 
-    def plot(self, *args):
+    def plot(self, *args, **kwargs):
         """
         Plot latent space X in 1D:
 
@@ -150,8 +150,7 @@ class NormalPosterior(VariationalPosterior):
         import sys
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ...plotting.matplot_dep import variational_plots
-        import matplotlib
-        return variational_plots.plot(self,*args)
+        return variational_plots.plot(self, *args, **kwargs)
 
 class SpikeAndSlabPosterior(VariationalPosterior):
     '''

From e35999b24ba3e070dc245f775c82d5adddebc116 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Wed, 8 Apr 2015 08:24:55 +0200
Subject: [PATCH 146/166] [var plots] wrong return values

---
 GPy/plotting/matplot_dep/variational_plots.py | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/GPy/plotting/matplot_dep/variational_plots.py b/GPy/plotting/matplot_dep/variational_plots.py
index 5cced10d..55128ec7 100644
--- a/GPy/plotting/matplot_dep/variational_plots.py
+++ b/GPy/plotting/matplot_dep/variational_plots.py
@@ -1,6 +1,6 @@
 import pylab as pb, numpy as np
 
-def plot(parameterized, fignum=None, ax=None, colors=None):
+def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
     """
     Plot latent space X in 1D:
 
@@ -13,13 +13,15 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
 
     """
     if ax is None:
-        fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
+        fig = pb.figure(num=fignum, figsize=figsize)
     if colors is None:
         colors = pb.gca()._get_lines.color_cycle
         pb.clf()
     else:
         colors = iter(colors)
-    plots = []
+    lines = []
+    fills = []
+    bg_lines = []
     means, variances = parameterized.mean, parameterized.variance
     x = np.arange(means.shape[0])
     for i in range(means.shape[1]):
@@ -29,20 +31,20 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
             a = ax[i]
         else:
             raise ValueError("Need one ax per latent dimension input_dim")
-        a.plot(means, c='k', alpha=.3)
-        plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
-        a.fill_between(x,
+        bg_lines.append(a.plot(means, c='k', alpha=.3))
+        lines.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
+        fills.append(a.fill_between(x,
                         means.T[i] - 2 * np.sqrt(variances.T[i]),
                         means.T[i] + 2 * np.sqrt(variances.T[i]),
-                        facecolor=plots[-1].get_color(),
-                        alpha=.3)
+                        facecolor=lines[-1].get_color(),
+                        alpha=.3))
         a.legend(borderaxespad=0.)
         a.set_xlim(x.min(), x.max())
         if i < means.shape[1] - 1:
             a.set_xticklabels('')
     pb.draw()
     fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
-    return fig
+    return dict(lines=lines, fills=fills, bg_lines=bg_lines)
 
 def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):
     """

From e658637c18acdb59ee5f4ceacce106e2b38cb6b4 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Wed, 8 Apr 2015 10:57:20 +0100
Subject: [PATCH 147/166] Added Y_metadata to log_predictive_density

---
 GPy/likelihoods/likelihood.py | 41 +++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 1295245c..4f3f2e37 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -70,7 +70,7 @@ class Likelihood(Parameterized):
         """
         raise NotImplementedError
 
-    def log_predictive_density(self, y_test, mu_star, var_star):
+    def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
         """
         Calculation of the log predictive density
 
@@ -87,13 +87,46 @@ class Likelihood(Parameterized):
         assert y_test.shape==mu_star.shape
         assert y_test.shape==var_star.shape
         assert y_test.shape[1] == 1
-        def integral_generator(y, m, v):
+
+        flat_y_test = y_test.flatten()
+        flat_mu_star = mu_star.flatten()
+        flat_var_star = var_star.flatten()
+
+        if Y_metadata is not None:
+            #Need to zip individual elements of Y_metadata aswell
+            Y_metadata_flat = {}
+            if Y_metadata is not None:
+                for key, val in Y_metadata.items():
+                    Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1,1)
+
+            zipped_values = []
+
+            for i in range(y_test.shape[0]):
+                y_m = {}
+                for key, val in Y_metadata_flat.items():
+                    if np.isscalar(val) or val.shape[0] == 1:
+                        y_m[key] = val
+                    else:
+                        #Won't broadcast yet
+                        y_m[key] = val[i]
+                zipped_values.append((flat_y_test[i], flat_mu_star[i], flat_var_star[i], y_m))
+        else:
+            #Otherwise just pass along None's
+            zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
+
+        def integral_generator(y, m, v, y_m):
             """Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
             def f(f_star):
-                return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
+                #exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
+                #from GPy.util.misc import safe_exp
+                #exponent = safe_exp(exponent)
+                #return self.pdf(f_star, y, y_m)*exponent
+
+                #More stable in the log space
+                return np.exp(self.logpdf(f_star, y, y_m) -(1./(2*v))*np.square(m-f_star))
             return f
 
-        scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
+        scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v, y_m), -np.inf, np.inf) for y, m, v, y_m in zipped_values])
         scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
         p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
         return np.log(p_ystar)

From 1e30ffd73038168e6e793c4315aefc74c129ada3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Thu, 9 Apr 2015 15:42:02 +0100
Subject: [PATCH 148/166] speed ups for normal cdf

---
 GPy/inference/optimization/optimization.py |  4 ++
 GPy/likelihoods/bernoulli.py               | 15 +++--
 GPy/likelihoods/likelihood.py              | 12 +++-
 GPy/likelihoods/link_functions.py          | 25 +++-----
 GPy/testing/model_tests.py                 |  2 +-
 GPy/util/misc.py                           |  4 +-
 GPy/util/univariate_Gaussian.py            | 72 ++--------------------
 7 files changed, 38 insertions(+), 96 deletions(-)

diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py
index aa9be793..5aa2ed03 100644
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@@ -140,6 +140,10 @@ class opt_lbfgsb(Optimizer):
         self.funct_eval = opt_result[2]['funcalls']
         self.status = rcstrings[opt_result[2]['warnflag']]
 
+        #a more helpful error message is available in opt_result in the Error case
+        if opt_result[2]['warnflag']==2:
+            self.status = 'Error' + opt_result[2]['task']
+
 class opt_simplex(Optimizer):
     def __init__(self, *args, **kwargs):
         Optimizer.__init__(self, *args, **kwargs)
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index f5690aa4..2febda96 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -2,10 +2,10 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
+
 import link_functions
 from likelihood import Likelihood
-from scipy import stats
 
 class Bernoulli(Likelihood):
     """
@@ -81,19 +81,18 @@ class Bernoulli(Likelihood):
         if isinstance(self.gp_link, link_functions.Probit):
 
             if gh_points is None:
-                gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
+                gh_x, gh_w = self._gh_points()
             else:
                 gh_x, gh_w = gh_points
 
-            from scipy import stats
 
             shape = m.shape
             m,v,Y = m.flatten(), v.flatten(), Y.flatten()
             Ysign = np.where(Y==1,1,-1)
             X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
-            p = stats.norm.cdf(X)
+            p = std_norm_cdf(X)
             p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
-            N = stats.norm.pdf(X)
+            N = std_norm_pdf(X)
             F = np.log(p).dot(gh_w)
             NoverP = N/p
             dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
@@ -106,10 +105,10 @@ class Bernoulli(Likelihood):
     def predictive_mean(self, mu, variance, Y_metadata=None):
 
         if isinstance(self.gp_link, link_functions.Probit):
-            return stats.norm.cdf(mu/np.sqrt(1+variance))
+            return std_norm_cdf(mu/np.sqrt(1+variance))
 
         elif isinstance(self.gp_link, link_functions.Heaviside):
-            return stats.norm.cdf(mu/np.sqrt(variance))
+            return std_norm_cdf(mu/np.sqrt(variance))
 
         else:
             raise NotImplementedError
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 4f3f2e37..9f2f3e7a 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
@@ -165,6 +165,13 @@ class Likelihood(Parameterized):
 
         return z, mean, variance
 
+    #only compute gh points if required
+    __gh_points = None
+    def _gh_points(self):
+        if self.__gh_points is None:
+            self.__gh_points = np.polynomial.hermite.hermgauss(20)
+        return self.__gh_points
+
     def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         """
         Use Gauss-Hermite Quadrature to compute
@@ -177,10 +184,9 @@ class Likelihood(Parameterized):
 
         if no gh_points are passed, we construct them using defualt options
         """
-        #May be broken
 
         if gh_points is None:
-            gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
+            gh_x, gh_w = self._gh_points()
         else:
             gh_x, gh_w = gh_points
 
diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index a4ddc760..6b297f92 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -1,10 +1,9 @@
-# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from scipy import stats
+from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
 import scipy as sp
-from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
 
 _exp_lim_val = np.finfo(np.float64).max
 _lim_val = np.log(_exp_lim_val)
@@ -64,13 +63,12 @@ class Identity(GPTransformation):
     def d3transf_df3(self,f):
         return np.zeros_like(f)
 
-
 class Probit(GPTransformation):
     """
     .. math::
 
         g(f) = \\Phi^{-1} (mu)
-    
+
     """
     def transf(self,f):
         return std_norm_cdf(f)
@@ -79,13 +77,10 @@ class Probit(GPTransformation):
         return std_norm_pdf(f)
 
     def d2transf_df2(self,f):
-        #FIXME
         return -f * std_norm_pdf(f)
 
     def d3transf_df3(self,f):
-        #FIXME
-        f2 = f**2
-        return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
+        return (np.square(f)-1.)*std_norm_pdf(f)
 
 
 class Cloglog(GPTransformation):
@@ -98,7 +93,7 @@ class Cloglog(GPTransformation):
         or
 
         f = \log (-\log(1-p))
-    
+
     """
     def transf(self,f):
         return 1-np.exp(-np.exp(f))
@@ -123,16 +118,16 @@ class Log(GPTransformation):
 
     """
     def transf(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def dtransf_df(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def d2transf_df2(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def d3transf_df3(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
 class Log_ex_1(GPTransformation):
     """
@@ -174,7 +169,7 @@ class Heaviside(GPTransformation):
 
     .. math::
 
-        g(f) = I_{x \\in A}
+        g(f) = I_{x \\geq 0}
 
     """
     def transf(self,f):
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 559014f7..5950de08 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -476,7 +476,7 @@ class GradientTests(np.testing.TestCase):
         likelihood = GPy.likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
         m = GPy.core.SparseGP(X, Y, X[np.random.choice(num_obs, 10)],
                               kern, likelihood,
-                              GPy.inference.latent_function_inference.VarDTC(),
+                              inference_method=GPy.inference.latent_function_inference.VarDTC(),
                               Y_metadata=Y_metadata)
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 99bd62b3..84bf4dc1 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -23,7 +23,7 @@ def chain_1(df_dg, dg_dx):
     """
     if np.all(dg_dx==1.):
         return df_dg
-    if len(df_dg) > 1 and df_dg.shape[-1] > 1:
+    if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1:
         import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
         raise NotImplementedError('Not implemented for matricies yet')
     return df_dg * dg_dx
@@ -37,7 +37,7 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
     """
     if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
         return d2f_dg2
-    if  len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
+    if  len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
         raise NotImplementedError('Not implemented for matricies yet')
     #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
     dg_dx_2 = dg_dx**2
diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py
index 09b2e99c..79864f86 100644
--- a/GPy/util/univariate_Gaussian.py
+++ b/GPy/util/univariate_Gaussian.py
@@ -1,77 +1,15 @@
 # Copyright (c) 2012, 2013 Ricardo Andrade
+# Copyright (c) 2015 James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
 from scipy import weave
+from scipy.special import ndtr as std_norm_cdf
 
+#define a standard normal pdf
+_sqrt_2pi = np.sqrt(2*np.pi)
 def std_norm_pdf(x):
-    """Standard Gaussian density function"""
-    return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
-
-def std_norm_cdf(x):
-    """
-    Cumulative standard Gaussian distribution
-    Based on Abramowitz, M. and Stegun, I. (1970)
-    """
-    x_shape = np.asarray(x).shape
-
-    if len(x_shape) == 0 or x_shape[0] == 1:
-        sign = np.sign(x)
-        x *= sign
-        x /= np.sqrt(2.)
-        t = 1.0/(1.0 +  0.3275911*x)
-        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
-        cdf_x = 0.5*(1.0 + sign*erf)
-        return cdf_x
-    else:
-        x = np.atleast_1d(x).copy()
-        cdf_x = np.zeros_like(x)
-        sign = np.ones_like(x)
-        neg_x_ind = x<0
-        sign[neg_x_ind] = -1.0
-        x[neg_x_ind] = -x[neg_x_ind]
-        x /= np.sqrt(2.)
-        t = 1.0/(1.0 +  0.3275911*x)
-        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
-        cdf_x = 0.5*(1.0 + sign*erf)
-        cdf_x = cdf_x.reshape(x_shape)
-    return cdf_x
-
-def std_norm_cdf_weave(x):
-    """
-    Cumulative standard Gaussian distribution
-    Based on Abramowitz, M. and Stegun, I. (1970)
-
-    A weave implementation of std_norm_cdf, which is faster. this is unused,
-    because of the difficulties of a weave dependency. (see github issue #94)
-
-    """
-    #Generalize for many x
-    x = np.asarray(x).copy()
-    cdf_x = np.zeros_like(x)
-    N = x.size
-    support_code = "#include <math.h>"
-    code = """
-
-    double sign, t, erf;
-    for (int i=0; i<N; i++){
-        sign = 1.0;
-        if (x[i] < 0.0){
-            sign = -1.0;
-            x[i] = -x[i];
-        }
-        x[i] = x[i]/sqrt(2.0);
-
-        t = 1.0/(1.0 +  0.3275911*x[i]);
-
-        erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
-
-        //return_val = 0.5*(1.0 + sign*erf);
-        cdf_x[i] = 0.5*(1.0 + sign*erf);
-    }
-    """
-    weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
-    return cdf_x
+    return np.exp(-np.square(x)/2)/_sqrt_2pi
 
 def inv_std_norm_cdf(x):
     """

From debc2f1a0e52144dfe08fc0bab40c607f443e7a0 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 09:58:11 +0100
Subject: [PATCH 149/166] Added numerical clipping

---
 GPy/util/misc.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 66b7b3b9..37e19b9f 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -2,13 +2,14 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
+from scipy.special import cbrt
 from .config import *
 
 _lim_val = np.finfo(np.float64).max
 
 _lim_val_exp = np.log(_lim_val)
 _lim_val_square = np.sqrt(_lim_val)
-_lim_val_cube = np.power(_lim_val, -3)
+_lim_val_cube = cbrt(_lim_val)
 
 def safe_exp(f):
     clip_f = np.clip(f, -np.inf, _lim_val_exp)
@@ -39,8 +40,8 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
         return d2f_dg2
     if  len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
         raise NotImplementedError('Not implemented for matricies yet')
-    #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
-    dg_dx_2 = dg_dx**2
+    dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
+    #dg_dx_2 = dg_dx**2
     return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
 
 def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
@@ -55,8 +56,8 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
     if (  (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
            or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
         raise NotImplementedError('Not implemented for matricies yet')
-    #dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
-    dg_dx_3 = dg_dx**3
+    dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
+    #dg_dx_3 = dg_dx**3
     return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
 
 def opt_wrapper(m, **kwargs):

From ef2b11c799070fec46e20aa8d63aa323a6be6d8a Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 10:40:18 +0100
Subject: [PATCH 150/166] Minor commenting changes

---
 GPy/core/gp.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index bbd3939b..1d2c9e2d 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -296,7 +296,7 @@ class GP(Model):
         :type size: int.
         :param full_cov: whether to return the full covariance matrix, or just the diagonal.
         :type full_cov: bool.
-        :returns: Ysim: set of simulations
+        :returns: fsim: set of simulations
         :rtype: np.ndarray (N x samples)
         """
         m, v = self._raw_predict(X,  full_cov=full_cov)
@@ -304,11 +304,11 @@ class GP(Model):
             m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
         v = v.reshape(m.size,-1) if len(v.shape)==3 else v
         if not full_cov:
-            Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
+            fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
         else:
-            Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
+            fsim = np.random.multivariate_normal(m.flatten(), v, size).T
 
-        return Ysim
+        return fsim
 
     def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
         """
@@ -324,7 +324,7 @@ class GP(Model):
         :type noise_model: integer.
         :returns: Ysim: set of simulations, a Numpy array (N x samples).
         """
-        Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
+        fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
         Ysim = self.likelihood.samples(Ysim, Y_metadata)
 
         return Ysim

From 034d141d6353ab22d5d07ece83dd69cab0de93ae Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 10:43:00 +0100
Subject: [PATCH 151/166] Fix typo

---
 GPy/core/gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 1d2c9e2d..dc1519e1 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -325,7 +325,7 @@ class GP(Model):
         :returns: Ysim: set of simulations, a Numpy array (N x samples).
         """
         fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
-        Ysim = self.likelihood.samples(Ysim, Y_metadata)
+        Ysim = self.likelihood.samples(fsim, Y_metadata)
 
         return Ysim
 

From 5c9587404d27af60e5be0df5f630d1d4e02fd064 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 14:58:02 +0100
Subject: [PATCH 152/166] Added some numerical stability to link functions with
 tests for link functions

---
 GPy/likelihoods/link_functions.py  |  59 +++++++-----
 GPy/testing/link_function_tests.py | 143 +++++++++++++++++++++++++++++
 GPy/util/misc.py                   |  22 ++++-
 3 files changed, 197 insertions(+), 27 deletions(-)
 create mode 100644 GPy/testing/link_function_tests.py

diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index 60e260e7..03495c7e 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -5,9 +5,8 @@ import numpy as np
 from scipy import stats
 import scipy as sp
 from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
-
-_exp_lim_val = np.finfo(np.float64).max
-_lim_val = np.log(_exp_lim_val)
+from scipy.special import cbrt
+from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
 
 class GPTransformation(object):
     """
@@ -70,7 +69,7 @@ class Probit(GPTransformation):
     .. math::
 
         g(f) = \\Phi^{-1} (mu)
-    
+
     """
     def transf(self,f):
         return std_norm_cdf(f)
@@ -84,7 +83,7 @@ class Probit(GPTransformation):
 
     def d3transf_df3(self,f):
         #FIXME
-        f2 = f**2
+        f2 = safe_square(f)
         return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
 
 
@@ -98,22 +97,26 @@ class Cloglog(GPTransformation):
         or
 
         f = \log (-\log(1-p))
-    
+
     """
     def transf(self,f):
-        return 1-np.exp(-np.exp(f))
+        ef = safe_exp(f)
+        return 1-np.exp(-ef)
 
     def dtransf_df(self,f):
-        return np.exp(f-np.exp(f))
+        ef = safe_exp(f)
+        return np.exp(f-ef)
 
     def d2transf_df2(self,f):
-        ef = np.exp(f)
+        ef = safe_exp(f)
         return -np.exp(f-ef)*(ef-1.)
 
     def d3transf_df3(self,f):
-        ef = np.exp(f)
-        return np.exp(f-ef)*(1.-3*ef + ef**2)
-
+        ef = safe_exp(f)
+        ef2 = safe_square(ef)
+        three_times_ef = safe_three_times(ef)
+        r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
+        return r_val
 
 class Log(GPTransformation):
     """
@@ -123,16 +126,16 @@ class Log(GPTransformation):
 
     """
     def transf(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return safe_exp(f)
 
     def dtransf_df(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return safe_exp(f)
 
     def d2transf_df2(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return safe_exp(f)
 
     def d3transf_df3(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return safe_exp(f)
 
 class Log_ex_1(GPTransformation):
     """
@@ -142,17 +145,20 @@ class Log_ex_1(GPTransformation):
 
     """
     def transf(self,f):
-        return np.log(1.+np.exp(f))
+        return np.log1p(safe_exp(f))
 
     def dtransf_df(self,f):
-        return np.exp(f)/(1.+np.exp(f))
+        ef = safe_exp(f)
+        return ef/(1.+ef)
 
     def d2transf_df2(self,f):
-        aux = np.exp(f)/(1.+np.exp(f))
+        ef = safe_exp(f)
+        aux = ef/(1.+ef)
         return aux*(1.-aux)
 
     def d3transf_df3(self,f):
-        aux = np.exp(f)/(1.+np.exp(f))
+        ef = safe_exp(f)
+        aux = ef/(1.+ef)
         daux_df = aux*(1.-aux)
         return daux_df - (2.*aux*daux_df)
 
@@ -160,14 +166,17 @@ class Reciprocal(GPTransformation):
     def transf(self,f):
         return 1./f
 
-    def dtransf_df(self,f):
-        return -1./(f**2)
+    def dtransf_df(self, f):
+        f2 = safe_square(f)
+        return -1./f2
 
-    def d2transf_df2(self,f):
-        return 2./(f**3)
+    def d2transf_df2(self, f):
+        f3 = safe_cube(f)
+        return 2./f3
 
     def d3transf_df3(self,f):
-        return -6./(f**4)
+        f4 = safe_quad(f)
+        return -6./f4
 
 class Heaviside(GPTransformation):
     """
diff --git a/GPy/testing/link_function_tests.py b/GPy/testing/link_function_tests.py
new file mode 100644
index 00000000..fb8fba99
--- /dev/null
+++ b/GPy/testing/link_function_tests.py
@@ -0,0 +1,143 @@
+import numpy as np
+import scipy as sp
+from scipy.special import cbrt
+from GPy.models import GradientChecker
+_lim_val = np.finfo(np.float64).max
+_lim_val_exp = np.log(_lim_val)
+_lim_val_square = np.sqrt(_lim_val)
+_lim_val_cube = cbrt(_lim_val)
+from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
+
+class LinkFunctionTests(np.testing.TestCase):
+    def setUp(self):
+        self.small_f = np.array([[-1e-4]])
+        self.zero_f = np.array([[1e-4]])
+        self.mid_f = np.array([[5.0]])
+        self.large_f = np.array([[1e4]])
+        self.f_lower_lim = np.array(-np.inf)
+        self.f_upper_lim = np.array(np.inf)
+
+    def check_gradient(self, link_func, lim_of_inf, test_lim=False):
+        grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f)
+        self.assertTrue(grad.checkgrad(verbose=True))
+        grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f)
+        self.assertTrue(grad2.checkgrad(verbose=True))
+        grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f)
+        self.assertTrue(grad3.checkgrad(verbose=True))
+
+        grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f)
+        self.assertTrue(grad.checkgrad(verbose=True))
+        grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f)
+        self.assertTrue(grad2.checkgrad(verbose=True))
+        grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f)
+        self.assertTrue(grad3.checkgrad(verbose=True))
+
+        grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f)
+        self.assertTrue(grad.checkgrad(verbose=True))
+        grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f)
+        self.assertTrue(grad2.checkgrad(verbose=True))
+        grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f)
+        self.assertTrue(grad3.checkgrad(verbose=True))
+
+        #Do a limit test if the large f value is too large
+        large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3)
+        grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f)
+        self.assertTrue(grad.checkgrad(verbose=True))
+        grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f)
+        self.assertTrue(grad2.checkgrad(verbose=True))
+        grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f)
+        self.assertTrue(grad3.checkgrad(verbose=True))
+
+        if test_lim:
+            print "Testing limits"
+            #Remove some otherwise we are too close to the limit for gradcheck to work effectively
+            lim_of_inf = lim_of_inf - 1e-4
+            grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
+            self.assertTrue(grad.checkgrad(verbose=True))
+            grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf)
+            self.assertTrue(grad2.checkgrad(verbose=True))
+            grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf)
+            self.assertTrue(grad3.checkgrad(verbose=True))
+
+    def check_overflow(self, link_func, lim_of_inf):
+        #Check that it does something sensible beyond this limit,
+        #note this is not checking the value is correct, just that it isn't nan
+        beyond_lim_of_inf = lim_of_inf + 100.0
+        self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf)))
+        self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf)))
+        self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf)))
+
+        self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf)))
+        self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf)))
+        self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf)))
+
+    def test_log_overflow(self):
+        link = Log()
+        lim_of_inf = _lim_val_exp
+
+        np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f))
+        assert np.isinf(np.exp(np.log(self.f_upper_lim)))
+        #Check the clipping works
+        np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
+        #Need to look at most significant figures here rather than the decimals
+        np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
+        self.check_overflow(link, lim_of_inf)
+
+        #Check that it would otherwise fail
+        beyond_lim_of_inf = lim_of_inf + 10.0
+        old_err_state = np.seterr(over='ignore')
+        self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf)))
+        np.seterr(**old_err_state)
+
+    def test_log_ex_1_overflow(self):
+        link = Log_ex_1()
+        lim_of_inf = _lim_val_exp
+
+        np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
+        assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
+        #Check the clipping works
+        np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
+        #Need to look at most significant figures here rather than the decimals
+        np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
+        self.check_overflow(link, lim_of_inf)
+
+        #Check that it would otherwise fail
+        beyond_lim_of_inf = lim_of_inf + 10.0
+        old_err_state = np.seterr(over='ignore')
+        self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
+        np.seterr(**old_err_state)
+
+
+    def test_log_gradients(self):
+        # transf dtransf_df d2transf_df2 d3transf_df3
+        link = Log()
+        lim_of_inf = _lim_val_exp
+        self.check_gradient(link, lim_of_inf, test_lim=True)
+
+    def test_identity_gradients(self):
+        link = Identity()
+        lim_of_inf = _lim_val
+        #FIXME: Should be able to think of a way to test the limits of this
+        self.check_gradient(link, lim_of_inf, test_lim=False)
+
+    def test_probit_gradients(self):
+        link = Probit()
+        lim_of_inf = _lim_val
+        self.check_gradient(link, lim_of_inf, test_lim=True)
+
+    def test_Cloglog_gradients(self):
+        link = Cloglog()
+        lim_of_inf = _lim_val_exp
+        self.check_gradient(link, lim_of_inf, test_lim=True)
+
+    def test_Log_ex_1_gradients(self):
+        link = Log_ex_1()
+        lim_of_inf = _lim_val_exp
+        self.check_gradient(link, lim_of_inf, test_lim=True)
+        self.check_overflow(link, lim_of_inf)
+
+    def test_reciprocal_gradients(self):
+        link = Reciprocal()
+        lim_of_inf = _lim_val
+        #Does not work with much smaller values, and values closer to zero than 1e-5
+        self.check_gradient(link, lim_of_inf, test_lim=True)
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 37e19b9f..3b88da48 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -6,15 +6,33 @@ from scipy.special import cbrt
 from .config import *
 
 _lim_val = np.finfo(np.float64).max
-
 _lim_val_exp = np.log(_lim_val)
 _lim_val_square = np.sqrt(_lim_val)
-_lim_val_cube = cbrt(_lim_val)
+#_lim_val_cube = cbrt(_lim_val)
+_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
+_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
+_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
 
 def safe_exp(f):
     clip_f = np.clip(f, -np.inf, _lim_val_exp)
     return np.exp(clip_f)
 
+def safe_square(f):
+    f = np.clip(f, -np.inf, _lim_val_square)
+    return f**2
+
+def safe_cube(f):
+    f = np.clip(f, -np.inf, _lim_val_cube)
+    return f**3
+
+def safe_quad(f):
+    f = np.clip(f, -np.inf, _lim_val_quad)
+    return f**4
+
+def safe_three_times(f):
+    f = np.clip(f, -np.inf, _lim_val_three_times)
+    return 3*f
+
 def chain_1(df_dg, dg_dx):
     """
     Generic chaining function for first derivative

From 8f34bed6d76f47c79324a22ffb3b4f59aa20508e Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 15:03:07 +0100
Subject: [PATCH 153/166] Fix for model gradients

---
 GPy/core/model.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 4108e72c..937d30e5 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -256,7 +256,7 @@ class Model(Parameterized):
         else:
             optimizer = optimization.get_optimizer(optimizer)
             opt = optimizer(start, model=self, max_iters=max_iters, **kwargs)
-                        
+
         with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo:
             opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
             vo.finish(opt)
@@ -371,7 +371,12 @@ class Model(Parameterized):
                 f1 = self._objective(xx)
                 xx[xind] -= 2.*step
                 f2 = self._objective(xx)
-                df_ratio = np.abs((f1 - f2) / min(f1, f2))
+                #Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
+                #the same
+                if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
+                    df_ratio = np.abs((f1 - f2) / min(f1, f2))
+                else:
+                    df_ratio = 1.0
                 df_unstable = df_ratio < df_tolerance
                 numerical_gradient = (f1 - f2) / (2 * step)
                 if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]

From dff9ca8e6b7084fd030872000b4670b45bdc4b62 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 15:24:28 +0100
Subject: [PATCH 154/166] Added hessian and skew gradient checkers, some block
 functions

---
 GPy/kern/_src/independent_outputs.py |  12 +-
 GPy/models/bayesian_gplvm.py         |   5 +-
 GPy/models/gradient_checker.py       | 260 +++++++++++++++++++++++++++
 GPy/util/block_matrices.py           |  64 +++++--
 4 files changed, 323 insertions(+), 18 deletions(-)

diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py
index aa9dca80..6f8b7be1 100644
--- a/GPy/kern/_src/independent_outputs.py
+++ b/GPy/kern/_src/independent_outputs.py
@@ -8,7 +8,7 @@ import itertools
 
 def index_to_slices(index):
     """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index.
 
     e.g.
     >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@@ -79,10 +79,10 @@ class IndependentOutputs(CombinationKernel):
 
     def update_gradients_full(self,dL_dK,X,X2=None):
         slices = index_to_slices(X[:,self.index_dim])
-        if self.single_kern: 
+        if self.single_kern:
             target = np.zeros(self.kern.size)
             kerns = itertools.repeat(self.kern)
-        else: 
+        else:
             kerns = self.kern
             target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
         def collate_grads(kern, i, dL, X, X2):
@@ -94,7 +94,7 @@ class IndependentOutputs(CombinationKernel):
         else:
             slices2 = index_to_slices(X2[:,self.index_dim])
             [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
-        if self.single_kern: 
+        if self.single_kern:
             self.kern.gradient = target
         else:
             [kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
@@ -104,12 +104,14 @@ class IndependentOutputs(CombinationKernel):
         kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
         if X2 is None:
             # TODO: make use of index_to_slices
+            # FIXME: Broken as X is already sliced out
+            print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
             values = np.unique(X[:,self.index_dim])
             slices = [X[:,self.index_dim]==i for i in values]
             [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
               for kern, s in zip(kerns, slices)]
             #slices = index_to_slices(X[:,self.index_dim])
-            #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) 
+            #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
             #  for s in slices_i] for kern, slices_i in zip(kerns, slices)]
             #import ipdb;ipdb.set_trace()
             #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 7cbd69eb..e0f6c0bc 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
     def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
                  Z=None, kernel=None, inference_method=None, likelihood=None,
                  name='bayesian gplvm', mpi_comm=None, normalizer=None,
-                 missing_data=False, stochastic=False, batchsize=1):
+                 missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
 
         self.logger = logging.getLogger(self.__class__.__name__)
         if X is None:
@@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
                                            name=name, inference_method=inference_method,
                                            normalizer=normalizer, mpi_comm=mpi_comm,
                                            variational_prior=self.variational_prior,
+                                           Y_metadata=None
                                            )
         self.link_parameter(self.X, index=0)
 
@@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
     def parameters_changed(self):
         super(BayesianGPLVM,self).parameters_changed()
         if isinstance(self.inference_method, VarDTC_minibatch):
-            return        
+            return
 
         kl_fctr = 1.
         self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py
index 74026f8e..c2cde834 100644
--- a/GPy/models/gradient_checker.py
+++ b/GPy/models/gradient_checker.py
@@ -5,6 +5,8 @@ from ..core.model import Model
 import itertools
 import numpy
 from ..core.parameterization import Param
+np = numpy
+from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
 
 def get_shape(x):
     if isinstance(x, numpy.ndarray):
@@ -111,3 +113,261 @@ class GradientChecker(Model):
         #for name, shape in zip(self.names, self.shapes):
             #_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
         #return _param_names
+
+
+class HessianChecker(GradientChecker):
+
+    def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
+        """
+        :param f: Function (only used for numerical hessian gradient)
+        :param df: Gradient of function to check
+        :param ddf: Analytical gradient function
+        :param x0:
+            Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
+            Can be a list of arrays, if takes a list of arrays. This list will be passed
+            to f and df in the same order as given here.
+            If only one argument, make sure not to pass a list!!!
+
+        :type x0: [array-like] | array-like | float | int
+        :param names:
+            Names to print, when performing gradcheck. If a list was passed to x0
+            a list of names with the same length is expected.
+        :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
+
+        """
+        super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
+        self._f = f
+        self._df = df
+        self._ddf = ddf
+
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
+        """
+        Overwrite checkgrad method to check whole block instead of looping through
+
+        Shows diagnostics using matshow instead
+
+        :param verbose: If True, print a "full" checking of each parameter
+        :type verbose: bool
+        :param step: The size of the step around which to linearise the objective
+        :type step: float (default 1e-6)
+        :param tolerance: the tolerance allowed (see note)
+        :type tolerance: float (default 1e-3)
+
+        Note:-
+           The gradient is considered correct if the ratio of the analytical
+           and numerical gradients is within <tolerance> of unity.
+        """
+        try:
+            import numdifftools as nd
+        except:
+            raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
+
+        if target_param:
+            raise NotImplementedError('Only basic functionality is provided with this gradchecker')
+
+        #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
+        #variables
+        current_index = 0
+        for name, shape in zip(self.names, self.shapes):
+            current_size = numpy.prod(shape)
+            x = self.optimizer_array.copy()
+            #x = self._get_params_transformed().copy()
+            x = x[current_index:current_index + current_size].reshape(shape)
+
+            # Check gradients
+            analytic_hess = self._ddf(x)
+            if analytic_hess.shape[1] == 1:
+                analytic_hess = numpy.diagflat(analytic_hess)
+
+            #From the docs:
+            #x0 : vector location
+            #at which to differentiate fun
+            #If x0 is an N x M array, then fun is assumed to be a function
+            #of N*M variables., thus we must have it flat, not (N,1), but just (N,)
+            #numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
+            numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
+            #numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
+            numeric_hess = numeric_hess_partial(x)
+
+            check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
+            current_index += current_size
+        return check_passed
+
+    def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
+        """
+        Checkgrad a block matrix
+        """
+        if analytic_hess.dtype is np.dtype('object'):
+            #Make numeric hessian also into a block matrix
+            real_size = get_block_shapes(analytic_hess)
+            num_elements = np.sum(real_size)
+            if (num_elements, num_elements) == numeric_hess.shape:
+                #If the sizes are the same we assume they are the same
+                #(we have not fixed any values so the numeric is the whole hessian)
+                numeric_hess = get_blocks(numeric_hess, real_size)
+            else:
+                #Make a fake empty matrix and fill out the correct block
+                tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
+                tmp_numeric_hess[block_indices] = numeric_hess.copy()
+                numeric_hess = tmp_numeric_hess
+
+        if block_indices is not None:
+            #Extract the right block
+            analytic_hess = analytic_hess[block_indices]
+            numeric_hess = numeric_hess[block_indices]
+        else:
+            #Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
+            if analytic_hess.dtype is np.dtype('object'):
+                analytic_hess = unblock(analytic_hess)
+                numeric_hess = unblock(numeric_hess)
+
+        ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
+        difference = numpy.abs(analytic_hess - numeric_hess)
+
+        check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
+
+        if verbose:
+            if block_indices:
+                print "\nBlock {}".format(block_indices)
+            else:
+                print "\nAll blocks"
+
+            header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
+            header_string = map(lambda x: ' | '.join(header), [header])
+            separator = '-' * len(header_string[0])
+            print '\n'.join([header_string[0], separator])
+            min_r = '%.6f' % float(numpy.min(ratio))
+            max_r = '%.6f' % float(numpy.max(ratio))
+            max_d = '%.6f' % float(numpy.max(difference))
+            min_d = '%.6f' % float(numpy.min(difference))
+            cols = [max_r, min_r, min_d, max_d]
+
+            if check_passed:
+                checked = "\033[92m  True \033[0m"
+            else:
+                checked = "\033[91m  False \033[0m"
+
+            grad_string = "{} | {}  | {} |    {}    |   {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
+            print grad_string
+
+            if plot:
+                import pylab as pb
+                fig, axes = pb.subplots(2, 2)
+                max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
+                min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
+                msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
+                axes[0,0].set_title('Analytic hessian')
+                axes[0,0].xaxis.set_ticklabels([None])
+                axes[0,0].yaxis.set_ticklabels([None])
+                axes[0,0].xaxis.set_ticks([None])
+                axes[0,0].yaxis.set_ticks([None])
+                msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
+                pb.colorbar(msn, ax=axes[0,1])
+                axes[0,1].set_title('Numeric hessian')
+                axes[0,1].xaxis.set_ticklabels([None])
+                axes[0,1].yaxis.set_ticklabels([None])
+                axes[0,1].xaxis.set_ticks([None])
+                axes[0,1].yaxis.set_ticks([None])
+                msr = axes[1,0].matshow(ratio)
+                pb.colorbar(msr, ax=axes[1,0])
+                axes[1,0].set_title('Ratio')
+                axes[1,0].xaxis.set_ticklabels([None])
+                axes[1,0].yaxis.set_ticklabels([None])
+                axes[1,0].xaxis.set_ticks([None])
+                axes[1,0].yaxis.set_ticks([None])
+                msd = axes[1,1].matshow(difference)
+                pb.colorbar(msd, ax=axes[1,1])
+                axes[1,1].set_title('difference')
+                axes[1,1].xaxis.set_ticklabels([None])
+                axes[1,1].yaxis.set_ticklabels([None])
+                axes[1,1].xaxis.set_ticks([None])
+                axes[1,1].yaxis.set_ticks([None])
+                if block_indices:
+                    fig.suptitle("Block: {}".format(block_indices))
+                pb.show()
+
+        return check_passed
+
+class SkewChecker(HessianChecker):
+
+    def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
+        """
+        :param df: gradient of function
+        :param ddf: Gradient of function to check (hessian)
+        :param dddf: Analytical gradient function (third derivative)
+        :param x0:
+            Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
+            Can be a list of arrays, if takes a list of arrays. This list will be passed
+            to f and df in the same order as given here.
+            If only one argument, make sure not to pass a list!!!
+
+        :type x0: [array-like] | array-like | float | int
+        :param names:
+            Names to print, when performing gradcheck. If a list was passed to x0
+            a list of names with the same length is expected.
+        :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
+
+        """
+        super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
+
+    def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
+        """
+        Gradient checker that just checks each hessian individually
+
+        super_plot will plot the hessian wrt every parameter, plot will just do the first one
+        """
+        try:
+            import numdifftools as nd
+        except:
+            raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
+
+        if target_param:
+            raise NotImplementedError('Only basic functionality is provided with this gradchecker')
+
+        #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
+        #variables
+        current_index = 0
+        for name, n_shape in zip(self.names, self.shapes):
+            current_size = numpy.prod(n_shape)
+            x = self.optimizer_array.copy()
+            #x = self._get_params_transformed().copy()
+            x = x[current_index:current_index + current_size].reshape(n_shape)
+
+            # Check gradients
+            #Actually the third derivative
+            analytic_hess = self._ddf(x)
+
+            #Can only calculate jacobian for one variable at a time
+            #From the docs:
+            #x0 : vector location
+            #at which to differentiate fun
+            #If x0 is an N x M array, then fun is assumed to be a function
+            #of N*M variables., thus we must have it flat, not (N,1), but just (N,)
+            #numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
+            #Actually _df is already the hessian
+            numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
+            numeric_hess = numeric_hess_partial(x)
+
+            print "Done making numerical hessian"
+            if analytic_hess.dtype is np.dtype('object'):
+                #Blockify numeric_hess aswell
+                blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
+                #HACK
+                real_block_size = np.sum(blocksizes)
+                numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
+                #numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
+            else:
+                numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
+
+            #Check every block individually (for ease)
+            check_passed = [False]*numeric_hess.shape[2]
+            for block_ind in xrange(numeric_hess.shape[2]):
+                #Unless super_plot is set, just plot the first one
+                p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
+                if verbose:
+                    print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
+                check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
+
+            current_index += current_size
+        return np.all(check_passed)
+
diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py
index a047abc6..e1e04aaa 100644
--- a/GPy/util/block_matrices.py
+++ b/GPy/util/block_matrices.py
@@ -1,9 +1,37 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2014-2015, Alan Saul
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 
+def get_blocks_3d(A, blocksizes, pagesizes=None):
+    """
+    Given a 3d matrix, make a block matrix, where the first and second dimensions are blocked according
+    to blocksizes, and the pages are blocked using pagesizes
+    """
+    assert (A.shape[0]==A.shape[1]) and len(A.shape)==3, "can't blockify this non-square matrix, may need to use 2d version"
+    N = np.sum(blocksizes)
+    assert A.shape[0] == N, "bad blocksizes"
+    num_blocks = len(blocksizes)
+    if pagesizes == None:
+        #Assume each page of A should be its own dimension
+        pagesizes = range(A.shape[2])#[0]*A.shape[2]
+    num_pages = len(pagesizes)
+    B = np.empty(shape=(num_blocks, num_blocks, num_pages), dtype=np.object)
+    count_k = 0
+    #for Bk, k in enumerate(pagesizes):
+    for Bk in pagesizes:
+        count_i = 0
+        for Bi, i in enumerate(blocksizes):
+            count_j = 0
+            for Bj, j in enumerate(blocksizes):
+                #We want to have it count_k:count_k + k but its annoying as it makes a NxNx1 array is page sizes are set to 1
+                B[Bi, Bj, Bk] = A[count_i:count_i + i, count_j:count_j + j, Bk]
+                count_j += j
+            count_i += i
+        #count_k += k
+    return B
+
 def get_blocks(A, blocksizes):
-    assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix"
+    assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can't blockify this non-square matrix"
     N = np.sum(blocksizes)
     assert A.shape[0] == N, "bad blocksizes"
     num_blocks = len(blocksizes)
@@ -17,6 +45,11 @@ def get_blocks(A, blocksizes):
         count_i += i
     return B
 
+def get_block_shapes_3d(B):
+    assert B.dtype is np.dtype('object'), "Must be a block matrix"
+    #FIXME: This isn't general AT ALL...
+    return get_block_shapes(B[:,:,0]), B.shape[2]
+
 def get_block_shapes(B):
     assert B.dtype is np.dtype('object'), "Must be a block matrix"
     return [B[b,b].shape[0] for b in range(0, B.shape[0])]
@@ -35,7 +68,7 @@ def unblock(B):
         count_i += i
     return A
 
-def block_dot(A, B):
+def block_dot(A, B, diagonal=False):
     """
     Element wise dot product on block matricies
 
@@ -48,21 +81,30 @@ def block_dot(A, B):
     +-------------+   +------+------+    +-------+-------+
 
     ..Note
+        If any block of either (A or B) are stored as 1d vectors then we assume
+        that it denotes a diagonal matrix efficient dot product using numpy
+        broadcasting will be used, i.e. A11*B11
+
         If either (A or B) of the diagonal matrices are stored as vectors then a more
         efficient dot product using numpy broadcasting will be used, i.e. A11*B11
     """
     #Must have same number of blocks and be a block matrix
     assert A.dtype is np.dtype('object'), "Must be a block matrix"
     assert B.dtype is np.dtype('object'), "Must be a block matrix"
-    Ashape = A.shape
-    Bshape = B.shape
-    assert Ashape == Bshape
-    def f(A,B):
-        if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
-            #FIXME: Careful if one is transpose of other, would make a matrix
-            return A*B
+    assert A.shape == B.shape
+    def f(C,D):
+        """
+        C is an element of A, D is the associated element of B
+        """
+        Cshape = C.shape
+        Dshape = D.shape
+        if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
+                or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
+            print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
+            return C*D
         else:
-            return np.dot(A,B)
+            print "Dotting, C: {} C:{}".format(C.shape, D.shape)
+            return np.dot(C,D)
     dot = np.vectorize(f, otypes = [np.object])
     return dot(A,B)
 

From f4cf052bce227730625a8a61e251dc57706adea2 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 15:44:15 +0100
Subject: [PATCH 155/166] Added option to plot the transformed link function
 (posterior once the link function has been applied)

---
 GPy/core/gp.py                           | 19 +++---
 GPy/plotting/matplot_dep/models_plots.py | 75 ++++++++++++++++++++----
 2 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index dc1519e1..75e5d49a 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -6,14 +6,13 @@ import sys
 from .. import kern
 from .model import Model
 from .parameterization import ObsAr
-from .model import Model
 from .mapping import Mapping
-from .parameterization import ObsAr
 from .. import likelihoods
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
 from .parameterization.variational import VariationalPosterior
 
 import logging
+import warnings
 from GPy.util.normalizer import MeanNorm
 logger = logging.getLogger("GP")
 
@@ -65,10 +64,14 @@ class GP(Model):
             self.Y = ObsAr(Y)
             self.Y_normalized = self.Y
 
-        assert Y.shape[0] == self.num_data
+        if Y.shape[0] != self.num_data:
+            #There can be cases where we want inputs than outputs, for example if we have multiple latent
+            #function values
+            warnings.warn("There are more rows in your input data X, \
+                         than in your output data Y, be VERY sure this is what you want")
         _, self.output_dim = self.Y.shape
 
-        #TODO: check the type of this is okay?
+        assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
         self.Y_metadata = Y_metadata
 
         assert isinstance(kernel, kern.Kern)
@@ -326,14 +329,14 @@ class GP(Model):
         """
         fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
         Ysim = self.likelihood.samples(fsim, Y_metadata)
-
         return Ysim
 
     def plot_f(self, plot_limits=None, which_data_rows='all',
         which_data_ycols='all', fixed_inputs=[],
         levels=20, samples=0, fignum=None, ax=None, resolution=None,
         plot_raw=True,
-        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
+        linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
+        apply_link=False):
         """
         Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
         This is a call to plot with plot_raw=True.
@@ -370,6 +373,8 @@ class GP(Model):
         :type Y_metadata: dict
         :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
         :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
+        :param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
+        :type apply_link: boolean
         """
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import models_plots
@@ -382,7 +387,7 @@ class GP(Model):
                                      which_data_ycols, fixed_inputs,
                                      levels, samples, fignum, ax, resolution,
                                      plot_raw=plot_raw, Y_metadata=Y_metadata,
-                                     data_symbol=data_symbol, **kw)
+                                     data_symbol=data_symbol, apply_link=apply_link, **kw)
 
     def plot(self, plot_limits=None, which_data_rows='all',
         which_data_ycols='all', fixed_inputs=[],
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index 5cdf69fc..0cda12f1 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 try:
@@ -16,7 +16,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
         which_data_ycols='all', fixed_inputs=[],
         levels=20, samples=0, fignum=None, ax=None, resolution=None,
         plot_raw=False,
-        linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
+        linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
+        apply_link=False, samples_f=0, plot_uncertain_inputs=True):
     """
     Plot the posterior of the GP.
       - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@@ -38,7 +39,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
     :type resolution: int
     :param levels: number of levels to plot in a contour plot.
     :type levels: int
-    :param samples: the number of a posteriori samples to plot
+    :param samples: the number of a posteriori samples to plot p(y*|y)
     :type samples: int
     :param fignum: figure to plot on.
     :type fignum: figure number
@@ -49,6 +50,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
     :type linecol:
     :param fillcol: color of fill
     :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
+    :param apply_link: apply the link function if plotting f (default false)
+    :type apply_link: boolean
+    :param samples_f: the number of posteriori f samples to plot p(f*|y)
+    :type samples_f: int
     """
     #deal with optional arguments
     if which_data_rows == 'all':
@@ -88,8 +93,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
         #make a prediction on the frame and plot it
         if plot_raw:
             m, v = model._raw_predict(Xgrid)
-            lower = m - 2*np.sqrt(v)
-            upper = m + 2*np.sqrt(v)
+            if apply_link:
+                lower = model.likelihood.gp_link.transf(m - 2*np.sqrt(v))
+                upper = model.likelihood.gp_link.transf(m + 2*np.sqrt(v))
+                #Once transformed this is now the median of the function
+                m = model.likelihood.gp_link.transf(m)
+            else:
+                lower = m - 2*np.sqrt(v)
+                upper = m + 2*np.sqrt(v)
         else:
             if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
                 meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
@@ -110,13 +121,31 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
                 plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
                 #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
 
+        if samples_f: #NOTE not tested with fixed_inputs
+            Fsim = model.posterior_samples_f(Xgrid, samples_f)
+            for fi in Fsim.T:
+                plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+                #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
+
 
         #add error bars for uncertain (if input uncertainty is being modelled)
-        if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
-            plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
-                        xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
-                        ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
-
+        if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs() and plot_uncertain_inputs:
+            if plot_raw:
+                #add error bars for uncertain (if input uncertainty is being modelled), for plot_f
+                #Hack to plot error bars on latent function, rather than on the data
+                vs = model.X.mean.values.copy()
+                for i,v in fixed_inputs:
+                    vs[:,i] = v
+                m_X, _ = model._raw_predict(vs)
+                if apply_link:
+                    m_X = model.likelihood.gp_link.transf(m_X)
+                plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(),
+                            xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
+            else:
+                plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
+                            xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
+                            ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
 
         #set the limits of the plot to some sensible values
         ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
@@ -186,3 +215,29 @@ def plot_fit_f(model, *args, **kwargs):
     """
     kwargs['plot_raw'] = True
     plot_fit(model,*args, **kwargs)
+
+def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
+    """
+    Convenience function for returning back fixed_inputs where the other inputs
+    are fixed using fix_routine
+    :param model: model
+    :type model: Model
+    :param non_fixed_inputs: dimensions of non fixed inputs
+    :type non_fixed_inputs: list
+    :param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
+    :type fix_routine: string
+    """
+    f_inputs = []
+    if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
+        X = model.X.mean.values.copy()
+    else:
+        X = model.X.values.copy()
+    for i in range(X.shape[1]):
+        if i not in non_fixed_inputs:
+            if fix_routine == 'mean':
+                f_inputs.append( (i, np.mean(X[:,i])) )
+            if fix_routine == 'median':
+                f_inputs.append( (i, np.median(X[:,i])) )
+            elif fix_routine == 'zero':
+                f_inputs.append( (i, 0) )
+    return f_inputs

From c76b7bbb9347729e216127f894284fc33a4bc0ae Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Fri, 10 Apr 2015 17:58:51 +0100
Subject: [PATCH 156/166] Added to init

---
 GPy/models/__init__.py       | 2 +-
 GPy/models/bayesian_gplvm.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 8f8fd838..0d18eb47 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -11,7 +11,7 @@ from .sparse_gplvm import SparseGPLVM
 from .warped_gp import WarpedGP
 from .bayesian_gplvm import BayesianGPLVM
 from .mrd import MRD
-from .gradient_checker import GradientChecker
+from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
 from .ss_gplvm import SSGPLVM
 from .gp_coregionalized_regression import GPCoregionalizedRegression
 from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index e0f6c0bc..3ac703fe 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -69,7 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
                                            name=name, inference_method=inference_method,
                                            normalizer=normalizer, mpi_comm=mpi_comm,
                                            variational_prior=self.variational_prior,
-                                           Y_metadata=None
+                                           Y_metadata=Y_metadata
                                            )
         self.link_parameter(self.X, index=0)
 

From b6761c21d7c87c9eeabd01410d7e2588612490a3 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 14 Apr 2015 14:17:08 +0100
Subject: [PATCH 157/166] Added LOO for laplace and exact inference on training
 data, Gaussian logpdf appeared to be wrong, now fixed

---
 .../exact_gaussian_inference.py               | 14 +++++
 .../latent_function_inference/laplace.py      | 62 +++++++++++++++++++
 GPy/likelihoods/gaussian.py                   |  5 +-
 3 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index 2a0a2592..76b10f08 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -64,3 +64,17 @@ class ExactGaussianInference(LatentFunctionInference):
         dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
 
         return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
+
+    def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
+        """
+        Leave one out error as found in
+        "Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
+        Vehtari et al. 2014.
+        """
+        g = posterior.woodbury_vector
+        c = posterior.woodbury_inv
+        c_diag = np.diag(c)[:, None]
+        neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag
+        #believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes
+        #this is the negative marginal LOO
+        return -neg_log_marginal_LOO
diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index c6921f57..19d53505 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -19,6 +19,7 @@ def warning_on_one_line(message, category, filename, lineno, file=None, line=Non
 warnings.formatwarning = warning_on_one_line
 from scipy import optimize
 from . import LatentFunctionInference
+from scipy.integrate import quad
 
 class Laplace(LatentFunctionInference):
 
@@ -39,6 +40,67 @@ class Laplace(LatentFunctionInference):
         self.first_run = True
         self._previous_Ki_fhat = None
 
+    def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
+        """
+        Leave one out log predictive density as found in
+        "Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
+        Vehtari et al. 2014.
+        """
+        Ki_f_init = np.zeros_like(Y)
+
+        if K is None:
+            K = kern.K(X)
+
+        f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
+        W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
+        logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
+
+        K_Wi_i, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
+
+        #Eq 37
+        posterior_cav_var = 1./(1./np.diag(Ki_W_i) - 1./np.diag(W))[:, None]
+        posterior_cav_mean = f_hat - posterior_cav_var*logpdf_dfhat
+
+        flat_y = Y.flatten()
+        flat_mu = posterior_cav_mean.flatten()
+        flat_var = posterior_cav_var.flatten()
+
+        if Y_metadata is not None:
+            #Need to zip individual elements of Y_metadata aswell
+            Y_metadata_flat = {}
+            if Y_metadata is not None:
+                for key, val in Y_metadata.items():
+                    Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1)
+
+            zipped_values = []
+
+            for i in range(Y.shape[0]):
+                y_m = {}
+                for key, val in Y_metadata_flat.items():
+                    if np.isscalar(val) or val.shape[0] == 1:
+                        y_m[key] = val
+                    else:
+                        #Won't broadcast yet
+                        y_m[key] = val[i]
+                zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m))
+        else:
+            #Otherwise just pass along None's
+            zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0])
+
+        def integral_generator(yi, mi, vi, yi_m):
+            def f(fi_star):
+                #More stable in the log space
+                return np.exp(likelihood.logpdf(fi_star, yi, yi_m)
+                              - 0.5*np.log(2*np.pi*vi)
+                              - 0.5*np.square(mi-fi_star)/vi)
+            return f
+
+        #Eq 25
+        p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
+                           for y, m, v, yi_m in zipped_values])
+        p_ystar = np.array(p_ystar).reshape(-1, 1)
+        return np.log(p_ystar)
+
     def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
         """
         Returns a Posterior class containing essential quantities of the posterior
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 9ecf7dbf..9abb8cde 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -132,10 +132,8 @@ class Gaussian(Likelihood):
         :returns: log likelihood evaluated for this point
         :rtype: float
         """
-        N = y.shape[0]
         ln_det_cov = np.log(self.variance)
-
-        return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
+        return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi)
 
     def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
         """
@@ -220,7 +218,6 @@ class Gaussian(Likelihood):
         """
         e = y - link_f
         s_4 = 1.0/(self.variance**2)
-        N = y.shape[0]
         dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
         return dlik_dsigma
 

From ab5f3591035c333079499a19a6d35357a9ce7dd0 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 14 Apr 2015 16:27:58 +0100
Subject: [PATCH 158/166] Changed LOO implementation for Eq 30 instead of 37

---
 .../latent_function_inference/laplace.py      | 22 ++++++++++++++-----
 GPy/likelihoods/likelihood.py                 |  6 ++---
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index 19d53505..ed21f094 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -57,9 +57,20 @@ class Laplace(LatentFunctionInference):
 
         K_Wi_i, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
 
-        #Eq 37
-        posterior_cav_var = 1./(1./np.diag(Ki_W_i) - 1./np.diag(W))[:, None]
-        posterior_cav_mean = f_hat - posterior_cav_var*logpdf_dfhat
+        W = np.diagflat(W)
+
+        #Eq 14, and 16
+        var_site = 1./np.diag(W)[:, None]
+        mu_site = f_hat + var_site*logpdf_dfhat
+        prec_site = 1./var_site
+        #Eq 19
+        marginal_cov = Ki_W_i
+        marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site)
+        marginal_var = np.diag(marginal_cov)[:, None]
+        #Eq 30 with using site parameters instead of Gaussian site parameters
+        #(var_site instead of sigma^{2} )
+        posterior_cav_var = 1./(1./marginal_var - 1./var_site)
+        posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y)
 
         flat_y = Y.flatten()
         flat_mu = posterior_cav_mean.flatten()
@@ -90,12 +101,13 @@ class Laplace(LatentFunctionInference):
         def integral_generator(yi, mi, vi, yi_m):
             def f(fi_star):
                 #More stable in the log space
-                return np.exp(likelihood.logpdf(fi_star, yi, yi_m)
+                p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m)
                               - 0.5*np.log(2*np.pi*vi)
                               - 0.5*np.square(mi-fi_star)/vi)
+                return p_fi
             return f
 
-        #Eq 25
+        #Eq 30
         p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
                            for y, m, v, yi_m in zipped_values])
         p_ystar = np.array(p_ystar).reshape(-1, 1)
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index f4b31091..470f5059 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -539,9 +539,9 @@ class Likelihood(Parameterized):
 
         #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
         # ensure we have gradients for every parameter we want to optimize
-        assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
-        assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
-        assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
+        assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d
+        assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f
+        assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d
 
         return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
 

From fe0a4285ca45ab6c9584147da396c2ca0f3c14d0 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Tue, 14 Apr 2015 17:14:05 +0100
Subject: [PATCH 159/166] Removed jitter printing

---
 GPy/util/linalg.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 8ac5418f..26c4b774 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -20,7 +20,7 @@ try:
     from scipy import weave
 except ImportError:
     config.set('weave', 'working', 'False')
-    
+
 
 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
 _fix_dpotri_scipy_bug = True
@@ -102,7 +102,6 @@ def jitchol(A, maxtries=5):
         num_tries = 1
         while num_tries <= maxtries and np.isfinite(jitter):
             try:
-                print(jitter)
                 L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
                 return L
             except:
@@ -115,7 +114,6 @@ def jitchol(A, maxtries=5):
     except:
         logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
             '  in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
-    import ipdb;ipdb.set_trace()
     return L
 
 # def dtrtri(L, lower=1):

From 361f0a527489f12a3949adc008a650221a455e09 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Thu, 16 Apr 2015 09:25:18 +0100
Subject: [PATCH 160/166] Fixed log predictive density, added option for LOO to
 provide some intemediate variables

---
 .../latent_function_inference/laplace.py       | 18 ++++++++++++------
 GPy/likelihoods/likelihood.py                  | 17 ++++++++++-------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py
index ed21f094..aefc82ac 100644
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@@ -40,7 +40,7 @@ class Laplace(LatentFunctionInference):
         self.first_run = True
         self._previous_Ki_fhat = None
 
-    def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
+    def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None):
         """
         Leave one out log predictive density as found in
         "Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
@@ -51,13 +51,19 @@ class Laplace(LatentFunctionInference):
         if K is None:
             K = kern.K(X)
 
-        f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
-        W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
+        if f_hat is None:
+            f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
+
+        if W is None:
+            W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
+
+        if Ki_W_i is None:
+            _, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
+
         logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
 
-        K_Wi_i, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
-
-        W = np.diagflat(W)
+        if W.shape[1] == 1:
+            W = np.diagflat(W)
 
         #Eq 14, and 16
         var_site = 1./np.diag(W)[:, None]
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 470f5059..34798a35 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -114,21 +114,24 @@ class Likelihood(Parameterized):
             #Otherwise just pass along None's
             zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
 
-        def integral_generator(y, m, v, y_m):
-            """Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
-            def f(f_star):
+        def integral_generator(yi, mi, vi, yi_m):
+            """Generate a function which can be integrated
+            to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
+            def f(fi_star):
                 #exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
                 #from GPy.util.misc import safe_exp
                 #exponent = safe_exp(exponent)
                 #return self.pdf(f_star, y, y_m)*exponent
 
                 #More stable in the log space
-                return np.exp(self.logpdf(f_star, y, y_m) -(1./(2*v))*np.square(m-f_star))
+                return np.exp(self.logpdf(fi_star, yi, yi_m)
+                              - 0.5*np.log(2*np.pi*vi)
+                              - 0.5*np.square(mi-fi_star)/vi)
             return f
 
-        scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v, y_m), -np.inf, np.inf) for y, m, v, y_m in zipped_values])
-        scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
-        p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
+        p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
+                           for yi, mi, vi, yi_m in zipped_values])
+        p_ystar = np.array(p_ystar).reshape(-1, 1)
         return np.log(p_ystar)
 
     def _moments_match_ep(self,obs,tau,v):

From ce4c14dd5a8c2729f955aa472704acba16219492 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Fri, 17 Apr 2015 12:17:21 +0200
Subject: [PATCH 161/166] [basis func kernels] added support for simple basis
 function kernels, can be easily extended by implementing phi function in
 BasisFuncKern

---
 GPy/kern/_src/basis_funcs.py | 101 +++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 GPy/kern/_src/basis_funcs.py

diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
new file mode 100644
index 00000000..b6a95354
--- /dev/null
+++ b/GPy/kern/_src/basis_funcs.py
@@ -0,0 +1,101 @@
+# #Copyright (c) 2012, Max Zwiessele (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+from .kern import Kern
+from ...core.parameterization.param import Param
+from ...core.parameterization.transformations import Logexp
+import numpy as np
+from ...util.caching import Cache_this
+from ...util.linalg import tdot
+
+class BasisFuncKernel(Kern):
+    def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'):
+        """
+        Abstract superclass for kernels with explicit basis functions for use in GPy.
+        
+        This class does NOT automatically add an offset to the design matrix phi!
+        """
+        super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
+        self.variance = Param('variance', variance, Logexp())
+        self.link_parameter(self.variance)
+    
+    def phi(self, X):
+        raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi')
+        
+    def K(self, X, X2=None):
+        return self.variance * self._K(X, X2)
+
+    def Kdiag(self, X, X2=None):
+        return self.variance * np.diag(self._K(X, X2))
+    
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2))
+        
+    def update_gradients_diag(self, dL_dKdiag, X):
+        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X))
+        
+    def concatenate_offset(self, X):
+        return np.c_[np.ones((X.shape[0], 1)), X]
+    
+    def posterior_inf(self, X=None, posterior=None):
+        """
+        Do the posterior inference on the parameters given this kernels functions 
+        and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. 
+        If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. 
+        """
+        if X is None:
+            try:
+                X = self._highest_parent_.X
+            except NameError:
+                raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
+        if posterior is None:
+            try:
+                posterior = self._highest_parent_.posterior
+            except NameError:
+                raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
+        phi = self.phi(X)
+        return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi)))
+    
+    @Cache_this(limit=3, ignore_args=())
+    def _K(self, X, X2):
+        if X2 is None or X is X2:
+            phi = self.phi(X)
+            if phi.ndim != 2:
+                phi = phi[:, None]
+            return tdot(phi)
+        else:
+            phi1 = self.phi(X)
+            phi2 = self.phi(X2)
+            if phi1.ndim != 2:
+                phi1 = phi1[:, None]
+                phi2 = phi2[:, None]
+            return phi1.dot(phi2.T)
+        
+        
+class LinearSlopeBasisFuncKernel(BasisFuncKernel):
+    def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'):
+        super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+        self.start = np.array(start)
+        self.stop = np.array(stop)
+    
+    @Cache_this(limit=3, ignore_args=())
+    def phi(self, X):
+        phi = np.where(X < self.start, self.start, X)
+        phi = np.where(phi > self.stop, self.stop, phi)
+        return ((phi-self.start)/(self.stop-self.start))-.5
+        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
+    
+class ChangePointBasisFuncKernel(BasisFuncKernel):
+    def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'):
+        super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
+        self.changepoint = changepoint
+    
+    @Cache_this(limit=3, ignore_args=())
+    def phi(self, X):
+        return self.concatenate_offset(np.where((X < self.changepoint), -1, 1))
+
+class DomainKernel(LinearSlopeBasisFuncKernel):
+    @Cache_this(limit=3, ignore_args=())
+    def phi(self, X):
+        phi = np.where((X>self.start)*(X<self.stop), 1., 0.)
+        return phi#((phi-self.start)/(self.stop-self.start))-.5
+        return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5

From 18f1c2772d27425ab5f1ce3e818f2b2ae67e6fb9 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Fri, 17 Apr 2015 12:17:30 +0200
Subject: [PATCH 162/166] [minor edits]

---
 GPy/core/gp.py          | 2 ++
 GPy/kern/__init__.py    | 2 ++
 GPy/kern/_src/static.py | 2 +-
 GPy/util/linalg.py      | 1 -
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 05ce282c..9125c296 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -98,6 +98,8 @@ class GP(Model):
         logger.info("adding kernel and likelihood as parameters")
         self.link_parameter(self.kern)
         self.link_parameter(self.likelihood)
+        self.posterior = None
+
 
     def set_XY(self, X=None, Y=None, trigger_update=True):
         """
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 0e1f8a0d..370ee9e2 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -19,3 +19,5 @@ from _src.trunclinear import TruncLinear,TruncLinear_inf
 from _src.splitKern import SplitKern,DEtime
 from _src.splitKern import DEtime as DiffGenomeKern
 
+from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
+
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index 7f59f5df..41798ded 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -109,7 +109,7 @@ class Fixed(Static):
         return self.variance * self.fixed_K
 
     def Kdiag(self, X):
-        return self.variance * self.fixed_K.diag()
+        return self.variance * self.fixed_K.diagonal()
 
     def update_gradients_full(self, dL_dK, X, X2=None):
         self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 1089b557..7cba4831 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -96,7 +96,6 @@ def jitchol(A, maxtries=5):
         num_tries = 1
         while num_tries <= maxtries and np.isfinite(jitter):
             try:
-                print jitter
                 L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
                 return L
             except:

From c3f80ece5d330636069e41a2e9ecef5f0acb84b1 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 20 Apr 2015 10:07:36 +0100
Subject: [PATCH 163/166] Updated svgp kernel gradients

---
 GPy/core/svgp.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py
index fd48a7ab..284386b3 100644
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@@ -54,12 +54,14 @@ class SVGP(SparseGP):
         self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
         grad = self.kern.gradient.copy()
         self.kern.update_gradients_full(self.grad_dict['dL_dKmn'], self.Z, self.X)
-        grad += self.kern.gradient
+        grad += self.kern.gradient.copy()
         self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
-        self.kern.gradient += grad
+        grad += self.kern.gradient.copy()
+        self.kern.gradient = grad
         if not self.Z.is_fixed:# only compute these expensive gradients if we need them
             self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.kern.gradients_X(self.grad_dict['dL_dKmn'], self.Z, self.X)
 
+
         self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
         #update the variational parameter gradients:
         self.m.gradient = self.grad_dict['dL_dm']

From 2d39afd0a5e11fb7809285de7d8bcfe96df2e1b1 Mon Sep 17 00:00:00 2001
From: Alan Saul <alan.daniel.saul@gmail.com>
Date: Mon, 20 Apr 2015 10:08:37 +0100
Subject: [PATCH 164/166] Reverted back

---
 GPy/core/svgp.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/core/svgp.py b/GPy/core/svgp.py
index 284386b3..06a9749c 100644
--- a/GPy/core/svgp.py
+++ b/GPy/core/svgp.py
@@ -56,8 +56,7 @@ class SVGP(SparseGP):
         self.kern.update_gradients_full(self.grad_dict['dL_dKmn'], self.Z, self.X)
         grad += self.kern.gradient.copy()
         self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
-        grad += self.kern.gradient.copy()
-        self.kern.gradient = grad
+        self.kern.gradient += grad
         if not self.Z.is_fixed:# only compute these expensive gradients if we need them
             self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.kern.gradients_X(self.grad_dict['dL_dKmn'], self.Z, self.X)
 

From e7650c8a90de31e3d4877fcbfbc217a9b1202ebf Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 20 Apr 2015 16:02:59 +0200
Subject: [PATCH 165/166] [sparse gp] memory overflow with big data, iterating
 over dimensions now

---
 GPy/core/sparse_gp.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 35644bfe..624a8f9c 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -10,6 +10,7 @@ from .parameterization.variational import VariationalPosterior, NormalPosterior
 from ..util.linalg import mdot
 
 import logging
+import itertools
 logger = logging.getLogger("sparse gp")
 
 class SparseGP(GP):
@@ -135,7 +136,13 @@ class SparseGP(GP):
                 var = var
             else:
                 Kxx = kern.Kdiag(Xnew)
-                var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
+                if self.posterior.woodbury_inv.ndim == 2:
+                    var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
+                elif self.posterior.woodbury_inv.ndim == 3:
+                    var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
+                    for i in range(var.shape[1]):
+                        var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
+                var = var
             #add in the mean function
             if self.mean_function is not None:
                 mu += self.mean_function.f(Xnew)

From 440d7b64786f6beed299adde5812d66aec662124 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Mon, 20 Apr 2015 16:03:39 +0200
Subject: [PATCH 166/166] [basis funcs] linear slope identifiability higher,
 symmetry plus true linear effect

---
 GPy/kern/_src/basis_funcs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/_src/basis_funcs.py b/GPy/kern/_src/basis_funcs.py
index b6a95354..1b300661 100644
--- a/GPy/kern/_src/basis_funcs.py
+++ b/GPy/kern/_src/basis_funcs.py
@@ -81,7 +81,7 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
     def phi(self, X):
         phi = np.where(X < self.start, self.start, X)
         phi = np.where(phi > self.stop, self.stop, phi)
-        return ((phi-self.start)/(self.stop-self.start))-.5
+        return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
         return self.concatenate_offset(phi)  # ((phi-self.start)/(self.stop-self.start))-.5
     
 class ChangePointBasisFuncKernel(BasisFuncKernel):