From 65c0c7236111350937c587b145c298190f97548a Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 21 Aug 2013 09:53:49 +0100
Subject: [PATCH] fixed target slicing bug in prod kernel

---
 GPy/core/gp_base.py        |  2 +-
 GPy/core/sparse_gp.py      | 50 ++++++++++++++++++++------------------
 GPy/core/svigp.py          |  4 +--
 GPy/examples/stochastic.py |  1 +
 GPy/kern/parts/prod.py     |  8 +++---
 5 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 750788e9..98fcaef1 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -164,7 +164,7 @@ class GPBase(Model):
             m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
             m = m.reshape(resolution, resolution).T
             ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
-            Yf = self.likelihood.Y.flatten()
+            Yf = self.likelihood.data.flatten()
             ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
             ax.set_xlim(xmin[0], xmax[0])
             ax.set_ylim(xmin[1], xmax[1])
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 62c9c4fd..aaa5dada 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -49,7 +49,7 @@ class SparseGP(GPBase):
         # normalize X uncertainty also
         if self.has_uncertain_inputs:
             self.X_variance /= np.square(self._Xscale)
-            
+
         self._const_jitter = None
 
     def getstate(self):
@@ -82,13 +82,14 @@ class SparseGP(GPBase):
             self.psi2 = None
 
     def _computations(self):
-        # factor Kmm
         if self._const_jitter is None or not(self._const_jitter.shape[0] == self.num_inducing):
             self._const_jitter = np.eye(self.num_inducing) * 1e-7
-        self.Lm = jitchol(self.Kmm + self._const_jitter)
+
+        # factor Kmm
+        self._Lm = jitchol(self.Kmm + self._const_jitter)
         # TODO: no white kernel needed anymore, all noise in likelihood --------
 
-        # The rather complex computations of self.A
+        # The rather complex computations of self._A
         if self.has_uncertain_inputs:
             if self.likelihood.is_heteroscedastic:
                 psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
@@ -105,23 +106,23 @@ class SparseGP(GPBase):
                 tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
             else:
                 tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
-        tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)
-        self.A = tdot(tmp)
+        tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
+        self._A = tdot(tmp)
 
 
         # factor B
-        self.B = np.eye(self.num_inducing) + self.A
+        self.B = np.eye(self.num_inducing) + self._A
         self.LB = jitchol(self.B)
 
         # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
         self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
 
         # back substutue C into psi1Vf
-        tmp, info1 = dtrtrs(self.Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
+        tmp, info1 = dtrtrs(self._Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
         self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
         # tmp, info2 = dpotrs(self.LB, tmp, lower=1)
         tmp, info2 = dtrtrs(self.LB, self._LBi_Lmi_psi1Vf, lower=1, trans=1)
-        self.Cpsi1Vf, info3 = dtrtrs(self.Lm, tmp, lower=1, trans=1)
+        self.Cpsi1Vf, info3 = dtrtrs(self._Lm, tmp, lower=1, trans=1)
 
         # Compute dL_dKmm
         tmp = tdot(self._LBi_Lmi_psi1Vf)
@@ -130,12 +131,12 @@ class SparseGP(GPBase):
         tmp = -0.5 * self.DBi_plus_BiPBi
         tmp += -0.5 * self.B * self.output_dim
         tmp += self.output_dim * np.eye(self.num_inducing)
-        self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
+        self.dL_dKmm = backsub_both_sides(self._Lm, tmp)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
         self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
         self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
-        dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
+        dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
 
         if self.likelihood.is_heteroscedastic:
             if self.has_uncertain_inputs:
@@ -163,17 +164,17 @@ class SparseGP(GPBase):
         else:
             # likelihood is not heterscedatic
             self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
-            self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
-            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - self.data_fit)
+            self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
+            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
 
     def log_likelihood(self):
         """ Compute the (lower bound on the) log marginal likelihood """
         if self.likelihood.is_heteroscedastic:
             A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
-            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
+            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self._A))
         else:
             A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
-            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
+            B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self._A))
         C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
         D = 0.5 * self.data_fit
         return A + B + C + D + self.likelihood.Z
@@ -203,7 +204,7 @@ class SparseGP(GPBase):
         if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
             self.likelihood.restart()
             if self.has_uncertain_inputs:
-                Lmi = chol_inv(self.Lm)
+                Lmi = chol_inv(self._Lm)
                 Kmmi = tdot(Lmi.T)
                 diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
 
@@ -245,19 +246,20 @@ class SparseGP(GPBase):
         return dL_dZ
 
     def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
-        """Internal helper function for making predictions, does not account for normalization"""
+        """
+        Internal helper function for making predictions, does not account for
+        normalization or likelihood function
+        """
 
         Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
         symmetrify(Bi)
-        Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
+        Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
 
         if self.Cpsi1V is None:
             psi1V = np.dot(self.psi1.T, self.likelihood.V)
-            tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+            tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
             tmp, _ = dpotrs(self.LB, tmp, lower=1)
-            self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1)
-
-
+            self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
 
         if X_variance_new is None:
             Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
@@ -318,14 +320,16 @@ class SparseGP(GPBase):
         return mean, var, _025pm, _975pm
 
     def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
+
         if ax is None:
             fig = pb.figure(num=fignum)
             ax = fig.add_subplot(111)
-
         if which_data is 'all':
             which_data = slice(None)
 
         GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
+
+        # add the inducing inputs and some errorbars
         if self.X.shape[1] == 1:
             if self.has_uncertain_inputs:
                 Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py
index 8a2d889d..b0175a39 100644
--- a/GPy/core/svigp.py
+++ b/GPy/core/svigp.py
@@ -141,7 +141,7 @@ class SVIGP(GPBase):
         self.Z = state.pop()
         vb_param = state.pop()
         GPBase.setstate(self, state)
-        self.set_vb_param(vb_param)        
+        self.set_vb_param(vb_param)
 
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
@@ -501,7 +501,7 @@ class SVIGP(GPBase):
             ax.plot(Zu, np.zeros_like(Zu) + Z_height, 'r|', mew=1.5, markersize=12)
 
         if self.input_dim==2:
-            ax.scatter(self.X_all[:,0], self.X_all[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
+            ax.scatter(self.X[:,0], self.X[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
             ax.plot(Zu[:,0], Zu[:,1], 'w^')
 
     def plot_traces(self):
diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py
index 533904d5..21011901 100644
--- a/GPy/examples/stochastic.py
+++ b/GPy/examples/stochastic.py
@@ -16,6 +16,7 @@ def toy_1d():
 
     m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
     m.constrain_bounded('noise_variance',1e-3,1e-1)
+    m.constrain_bounded('white_variance',1e-3,1e-1)
 
     m.param_steplength = 1e-4
 
diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py
index db31c626..8cc82109 100644
--- a/GPy/kern/parts/prod.py
+++ b/GPy/kern/parts/prod.py
@@ -80,8 +80,8 @@ class Prod(Kernpart):
     def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
         self._K_computations(X,X2)
-        self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
-        self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
+        self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
 
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
@@ -89,8 +89,8 @@ class Prod(Kernpart):
         self.k1.Kdiag(X[:,self.slice1],K1)
         self.k2.Kdiag(X[:,self.slice2],K2)
 
-        self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
-        self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
+        self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
+        self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
 
     def _K_computations(self,X,X2):
         if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):