From ab50dc7ceca6e79e88c6cc68771719863b074730 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 16 Apr 2013 12:36:15 +0100
Subject: [PATCH 01/11] a litle more stability in svigp

Another instance of dpotrs instead of dot
---
 GPy/models/sparse_GP.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 4d9edacc..16b22094 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -148,7 +148,10 @@ class sparse_GP(GP):
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
+        tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
+        #tmp = np.dot(tmp,self.Kmmi)
+        tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 - self.Kmmi) + self.E) + tmp # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From 56ecd4782a576c4b471379b594aaa9639f90e799 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 11:59:32 +0100
Subject: [PATCH 02/11] made the basic GP class use dtrtrs where possible

---
 GPy/models/GP.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/GPy/models/GP.py b/GPy/models/GP.py
index cfda0cfe..a46a35d0 100644
--- a/GPy/models/GP.py
+++ b/GPy/models/GP.py
@@ -9,6 +9,7 @@ from ..core import model
 from ..util.linalg import pdinv,mdot
 from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
 from ..likelihoods import EP
+from scipy import linalg
 
 class GP(model):
     """
@@ -78,10 +79,13 @@ class GP(model):
 
         #the gradient of the likelihood wrt the covariance matrix
         if self.likelihood.YYT is None:
-            alpha = np.dot(self.Ki,self.likelihood.Y)
+            #alpha = np.dot(self.Ki,self.likelihood.Y)
+            alpha,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
             self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
         else:
-            tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.YYT),lower=1)
+            tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(tmp.T),lower=1)
             self.dL_dK = 0.5*(tmp - self.D*self.Ki)
 
     def _get_params(self):
@@ -105,10 +109,13 @@ class GP(model):
         Computes the model fit using YYT if it's available
         """
         if self.likelihood.YYT is None:
-            return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+            #return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
+            tmp,info = linalg.lapack.flapack.dtrtrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
+            return -0.5*np.sum(np.square(tmp))
         else:
             return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
+
     def log_likelihood(self):
         """
         The log marginal likelihood of the GP.
@@ -136,8 +143,11 @@ class GP(model):
         for normalization or likelihood
         """
         Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
-        mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
-        KiKx = np.dot(self.Ki,Kx)
+        #mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
+        tmp,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(self.likelihood.Y),lower=1)
+        mu = np.dot(Kx.T,tmp)
+        #KiKx = np.dot(self.Ki,Kx)
+        KiKx,info = linalg.lapack.flapack.dpotrs(self.L,np.asfortranarray(Kx),lower=1)
         if full_cov:
             Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
             var = Kxx - np.dot(KiKx.T,Kx)

From 698f52e5e3cddb34c0524291fbb10165ffae858b Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 13:15:39 +0100
Subject: [PATCH 03/11] GPy now fails silently if sympy is not present

---
 GPy/kern/__init__.py     |  6 ++++-
 GPy/kern/constructors.py | 58 ++++++++++++++++++++++------------------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index f062ee56..93274ec5 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,5 +2,9 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+try:
+    from constructors import rbf_sympy, sympykern # these depend on sympy
+except:
+    pass
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 6a968da4..e5743f47 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -165,34 +165,40 @@ def Brownian(D,variance=1.):
     part = Brownianpart(D,variance)
     return kern(D, [part])
 
-import sympy as sp
-from sympykern import spkern
-from sympy.parsing.sympy_parser import parse_expr
+try:
+    import sympy as sp
+    from sympykern import spkern
+    from sympy.parsing.sympy_parser import parse_expr
+    sympy_available = True
+except ImportError:
+    sympy_available = False
 
-def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
-    """
-    Radial Basis Function covariance.
-    """
-    X = [sp.var('x%i'%i) for i in range(D)]
-    Z = [sp.var('z%i'%i) for i in range(D)]
-    rbf_variance = sp.var('rbf_variance',positive=True)
-    if ARD:
-        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
-        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/2.)
-    else:
-        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-        dist = parse_expr(dist_string)
-        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-    return kern(D,[spkern(D,f)])
+if sympy_available:
+    def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
+        """
+        Radial Basis Function covariance.
+        """
+        X = [sp.var('x%i'%i) for i in range(D)]
+        Z = [sp.var('z%i'%i) for i in range(D)]
+        rbf_variance = sp.var('rbf_variance',positive=True)
+        if ARD:
+            rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/2.)
+        else:
+            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+            dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+            dist = parse_expr(dist_string)
+            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+        return kern(D,[spkern(D,f)])
 
-def sympykern(D,k):
-    """
-    A kernel from a symbolic sympy representation
-    """
-    return kern(D,[spkern(D,k)])
+    def sympykern(D,k):
+        """
+        A kernel from a symbolic sympy representation
+        """
+        return kern(D,[spkern(D,k)])
+del sympy_available
 
 def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
     """

From 8bd017466d7c14a45ae77be3eb309c819d7109ea Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 22 Apr 2013 13:37:59 +0100
Subject: [PATCH 04/11] Nparam_transformed work better now

Before, counted the number of fixes, which failed when a fix fixed more
than one parameter...
---
 GPy/core/parameterised.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index b5d880a3..c80926ce 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -103,10 +103,18 @@ class parameterised(object):
             return expr
 
     def Nparam_transformed(self):
-            ties = 0
-            for ar in self.tied_indices:
-                ties += ar.size - 1
-            return self.Nparam - len(self.constrained_fixed_indices) - ties
+        """
+        Compute the number of parameters after ties and fixing have been performed
+        """
+        ties = 0
+        for ti in self.tied_indices:
+            ties += ti.size - 1
+
+        fixes = 0
+        for fi in self.constrained_fixed_indices:
+            fixes += len(fi)
+
+        return self.Nparam - fixes - ties
 
     def constrain_positive(self, which):
         """

From f1451419232d78dc6fd8cdfc44a95c1ad7640d93 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 10:02:12 +0100
Subject: [PATCH 05/11] added a kernel for independent outputs

---
 GPy/kern/__init__.py            |  2 +-
 GPy/kern/constructors.py        | 12 ++++
 GPy/kern/independent_outputs.py | 97 +++++++++++++++++++++++++++++++++
 3 files changed, 110 insertions(+), 1 deletion(-)
 create mode 100644 GPy/kern/independent_outputs.py

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 93274ec5..327bf69c 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
 try:
     from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index e5743f47..9c2464a7 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
 from rbfcos import rbfcos as rbfcospart
+from independent_outputs import independent_outputs as independent_output_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -324,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
     """
     part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
     return kern(D,[part])
+
+def independent_outputs(k):
+    """
+    Construct a kernel with independent outputs from an existing kernel
+    """
+    for sl in k.input_slices:
+        assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
+    parts = [independent_output_part(p) for p in k.parts]
+    return kern(k.D+1,parts)
+
+
diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
new file mode 100644
index 00000000..214c542c
--- /dev/null
+++ b/GPy/kern/independent_outputs.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2012, James Hesnsman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+from kernpart import kernpart
+import numpy as np
+
+def index_to_slices(index):
+    """
+    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
+
+    e.g.
+    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
+    returns
+    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
+
+    or, a more complicated example
+    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
+    returns
+    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
+    """
+
+    #contruct the return structure
+    ind = np.asarray(index,dtype=np.int64)
+    ret = [[] for i in range(ind.max()+1)]
+
+    #find the switchpoints
+    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
+    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
+
+    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
+    return ret
+
+class independent_outputs(kernpart):
+    """
+    A kernel part shich can reopresent several independent functions.
+    this kernel 'switches off' parts of the matrix where the output indexes are different.
+
+    The index of the functions is given by the last column in the input X
+    the rest of the columns of X are passed to the kernel for computation (in blocks).
+
+    """
+    def __init__(self,k):
+        self.D = k.D + 1
+        self.Nparam = k.Nparam
+        self.name = 'iops('+ k.name + ')'
+        self.k = k
+
+    def _get_params(self):
+        return self.k._get_params()
+
+    def _set_params(self,x):
+        self.k._set_params(x)
+        self.params = x
+
+    def _get_param_names(self):
+        return self.k._get_param_names()
+
+    def K(self,X,X2,target):
+        #Sort out the slices from the input data
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+
+        [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def Kdiag(self,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
+
+    def dK_dtheta(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dtheta(X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+
+    def dK_dX(self,dL_dK,X,X2,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        if X2 is None:
+            X2,slices2 = X,slices
+        else:
+            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
+        [[[self.k.dK_dX(X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+
+    def dKdiag_dX(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+
+
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+        X,slices = X[:,:-1],index_to_slices(X[:,-1])
+        [[self.k.dKdiag_dX(X[s],target) for s in slices_i] for slices_i in slices]

From f35578804a2dcbc9066d48a103bcaf4ed1d0fd5d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 10:56:10 +0100
Subject: [PATCH 06/11] prod_orthogonal now caches the K matrices

---
 GPy/kern/coregionalise.py   | 13 ++++++---
 GPy/kern/prod_orthogonal.py | 53 ++++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index a76bb31e..b1b69325 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -62,11 +62,16 @@ class coregionalise(kernpart):
         ii,jj = np.meshgrid(index,index2)
         ii,jj = ii.T, jj.T
 
+        #dL_dK_small = np.zeros_like(self.B)
+        #for i in range(self.Nout):
+            #for j in range(self.Nout):
+                #tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
+                #dL_dK_small[i,j] = tmp
+        #as above, but slightly faster
         dL_dK_small = np.zeros_like(self.B)
-        for i in range(self.Nout):
-            for j in range(self.Nout):
-                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
-                dL_dK_small[i,j] = tmp
+        where_i = [ii==i for i in xrange(self.Nout)]
+        where_j = [jj==j for j in xrange(self.Nout)]
+        [[np.put(dL_dK_small,i+self.Nout*j,np.sum(dL_dK[np.logical_and(wi,wj)])) for i,wi in enumerate(where_i)] for j,wj in enumerate(where_j)]
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index fc349da8..2afafe25 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -22,6 +22,7 @@ class prod_orthogonal(kernpart):
         self.k1 = k1
         self.k2 = k2
         self._set_params(np.hstack((k1._get_params(),k2._get_params())))
+        self._X, self._X2, self._params = np.empty(shape=(3,1)) # initialize cache
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -39,23 +40,38 @@ class prod_orthogonal(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        if X2 is None: X2 = X
-        target1 = np.zeros_like(target)
-        target2 = np.zeros_like(target)
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
-        target += target1 * target2
+        self._K_computations(X,X2)
+        target += self._K1*self._K2
+
+    def _K_computations(self,X,X2):
+        """
+        Compute the two kernel matrices.
+        The computation is only done if needed: many times it will be the same as the previous call
+        """
+        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
+            #store new values in cache
+            self._X = X.copy()
+            self._X2 = X2.copy()
+            self._params = self._get_params().copy()
+
+            #update self._K1, self._K2
+            if X2 is None: X2 = X
+            self._K1 = np.zeros((X.shape[0],X2.shape[0]))
+            self._K2 = np.zeros((X.shape[0],X2.shape[0]))
+            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
+            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
+        self._K_computations(X,X2)
+        self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
-        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        self._K_computations(X,X2)
+        self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -73,17 +89,6 @@ class prod_orthogonal(kernpart):
         self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
         self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
 
-    def dK_dX(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to X."""
-        if X2 is None: X2 = X
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
-        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
-
-        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
-
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])

From 9109d451abf6009270c43fc0b88c00bdbd0e6151 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 11:59:00 +0100
Subject: [PATCH 07/11] fixing small bug in independent outputs kern

---
 GPy/kern/independent_outputs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
index 214c542c..cc7c0051 100644
--- a/GPy/kern/independent_outputs.py
+++ b/GPy/kern/independent_outputs.py
@@ -76,7 +76,7 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dtheta(dL_dK,X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
 
     def dK_dX(self,dL_dK,X,X2,target):
@@ -85,13 +85,13 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dX(dL_dK,X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
 
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(X[s],target) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target) for s in slices_i] for slices_i in slices]

From d402047ff3274898e4ebf29bcc0149c123ef0495 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 12:01:10 +0100
Subject: [PATCH 08/11] more minor bugs

---
 GPy/kern/independent_outputs.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
index cc7c0051..b94202d7 100644
--- a/GPy/kern/independent_outputs.py
+++ b/GPy/kern/independent_outputs.py
@@ -76,7 +76,7 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(dL_dK,X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
 
     def dK_dX(self,dL_dK,X,X2,target):
@@ -85,13 +85,13 @@ class independent_outputs(kernpart):
             X2,slices2 = X,slices
         else:
             X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(dL_dK,X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
+        [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
 
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag,X[s],target) for s in slices_i] for slices_i in slices]
+        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

From 2205c333b2913275218ffdf3156e46f190d3c09d Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 23 Apr 2013 12:19:41 +0100
Subject: [PATCH 09/11] fixed a weird regular expression bug in
 ensure_def_constraints

---
 GPy/core/model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index f70125fd..e7b993e0 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,6 +13,7 @@ import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
+import re
 
 class model(parameterised):
     def __init__(self):
@@ -239,7 +240,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(param_names[i])
+                    to_make_positive.append(re.escape(param_names[i]))
                     if warn:
                         print "Warning! constraining %s postive"%name
         if len(to_make_positive):

From 0c8b83454f5ca772d2d87180ccbe891a295fcf8b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 14:02:15 +0100
Subject: [PATCH 10/11] Revert "merge devel mrd"

This reverts commit 3f625a9347fde47625f14898c0a3a6ed4f49b55a, reversing
changes made to dc6faeb30355bf9c6f0f3694e8546bcdf26372a8.
---
 GPy/core/model.py               |  3 +-
 GPy/core/parameterised.py       | 16 ++----
 GPy/kern/__init__.py            |  6 +-
 GPy/kern/constructors.py        | 70 +++++++++---------------
 GPy/kern/coregionalise.py       | 13 ++---
 GPy/kern/independent_outputs.py | 97 ---------------------------------
 GPy/kern/prod_orthogonal.py     | 53 ++++++++----------
 GPy/models/sparse_GP.py         |  5 +-
 8 files changed, 61 insertions(+), 202 deletions(-)
 delete mode 100644 GPy/kern/independent_outputs.py

diff --git a/GPy/core/model.py b/GPy/core/model.py
index e7b993e0..f70125fd 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -13,7 +13,6 @@ import priors
 from ..util.linalg import jitchol
 from ..inference import optimization
 from .. import likelihoods
-import re
 
 class model(parameterised):
     def __init__(self):
@@ -240,7 +239,7 @@ class model(parameterised):
         for s in positive_strings:
             for i in self.grep_param_names(s):
                 if not (i in currently_constrained):
-                    to_make_positive.append(re.escape(param_names[i]))
+                    to_make_positive.append(param_names[i])
                     if warn:
                         print "Warning! constraining %s postive"%name
         if len(to_make_positive):
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index c80926ce..b5d880a3 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -103,18 +103,10 @@ class parameterised(object):
             return expr
 
     def Nparam_transformed(self):
-        """
-        Compute the number of parameters after ties and fixing have been performed
-        """
-        ties = 0
-        for ti in self.tied_indices:
-            ties += ti.size - 1
-
-        fixes = 0
-        for fi in self.constrained_fixed_indices:
-            fixes += len(fi)
-
-        return self.Nparam - fixes - ties
+            ties = 0
+            for ar in self.tied_indices:
+                ties += ar.size - 1
+            return self.Nparam - len(self.constrained_fixed_indices) - ties
 
     def constrain_positive(self, which):
         """
diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 327bf69c..f062ee56 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -2,9 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
-try:
-    from constructors import rbf_sympy, sympykern # these depend on sympy
-except:
-    pass
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
 from kern import kern
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 9c2464a7..6a968da4 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -25,7 +25,6 @@ from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
 from rbfcos import rbfcos as rbfcospart
-from independent_outputs import independent_outputs as independent_output_part
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 
@@ -166,40 +165,34 @@ def Brownian(D,variance=1.):
     part = Brownianpart(D,variance)
     return kern(D, [part])
 
-try:
-    import sympy as sp
-    from sympykern import spkern
-    from sympy.parsing.sympy_parser import parse_expr
-    sympy_available = True
-except ImportError:
-    sympy_available = False
+import sympy as sp
+from sympykern import spkern
+from sympy.parsing.sympy_parser import parse_expr
 
-if sympy_available:
-    def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
-        """
-        Radial Basis Function covariance.
-        """
-        X = [sp.var('x%i'%i) for i in range(D)]
-        Z = [sp.var('z%i'%i) for i in range(D)]
-        rbf_variance = sp.var('rbf_variance',positive=True)
-        if ARD:
-            rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
-            dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
-            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/2.)
-        else:
-            rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
-            dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
-            dist = parse_expr(dist_string)
-            f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
-        return kern(D,[spkern(D,f)])
+def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
+    """
+    Radial Basis Function covariance.
+    """
+    X = [sp.var('x%i'%i) for i in range(D)]
+    Z = [sp.var('z%i'%i) for i in range(D)]
+    rbf_variance = sp.var('rbf_variance',positive=True)
+    if ARD:
+        rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
+        dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/2.)
+    else:
+        rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
+        dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
+        dist = parse_expr(dist_string)
+        f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
+    return kern(D,[spkern(D,f)])
 
-    def sympykern(D,k):
-        """
-        A kernel from a symbolic sympy representation
-        """
-        return kern(D,[spkern(D,k)])
-del sympy_available
+def sympykern(D,k):
+    """
+    A kernel from a symbolic sympy representation
+    """
+    return kern(D,[spkern(D,k)])
 
 def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
     """
@@ -325,14 +318,3 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
     """
     part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
     return kern(D,[part])
-
-def independent_outputs(k):
-    """
-    Construct a kernel with independent outputs from an existing kernel
-    """
-    for sl in k.input_slices:
-        assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
-    parts = [independent_output_part(p) for p in k.parts]
-    return kern(k.D+1,parts)
-
-
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py
index b1b69325..a76bb31e 100644
--- a/GPy/kern/coregionalise.py
+++ b/GPy/kern/coregionalise.py
@@ -62,16 +62,11 @@ class coregionalise(kernpart):
         ii,jj = np.meshgrid(index,index2)
         ii,jj = ii.T, jj.T
 
-        #dL_dK_small = np.zeros_like(self.B)
-        #for i in range(self.Nout):
-            #for j in range(self.Nout):
-                #tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
-                #dL_dK_small[i,j] = tmp
-        #as above, but slightly faster
         dL_dK_small = np.zeros_like(self.B)
-        where_i = [ii==i for i in xrange(self.Nout)]
-        where_j = [jj==j for j in xrange(self.Nout)]
-        [[np.put(dL_dK_small,i+self.Nout*j,np.sum(dL_dK[np.logical_and(wi,wj)])) for i,wi in enumerate(where_i)] for j,wj in enumerate(where_j)]
+        for i in range(self.Nout):
+            for j in range(self.Nout):
+                tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
+                dL_dK_small[i,j] = tmp
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/independent_outputs.py
deleted file mode 100644
index b94202d7..00000000
--- a/GPy/kern/independent_outputs.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2012, James Hesnsman
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-from kernpart import kernpart
-import numpy as np
-
-def index_to_slices(index):
-    """
-    take a numpy array of integers (index) and return a  nested list of slices such that the slices describe the start, stop points for each integer in the index. 
-
-    e.g.
-    >>> index = np.asarray([0,0,0,1,1,1,2,2,2])
-    returns
-    >>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
-
-    or, a more complicated example
-    >>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
-    returns
-    >>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
-    """
-
-    #contruct the return structure
-    ind = np.asarray(index,dtype=np.int64)
-    ret = [[] for i in range(ind.max()+1)]
-
-    #find the switchpoints
-    ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
-    switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
-
-    [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
-    return ret
-
-class independent_outputs(kernpart):
-    """
-    A kernel part shich can reopresent several independent functions.
-    this kernel 'switches off' parts of the matrix where the output indexes are different.
-
-    The index of the functions is given by the last column in the input X
-    the rest of the columns of X are passed to the kernel for computation (in blocks).
-
-    """
-    def __init__(self,k):
-        self.D = k.D + 1
-        self.Nparam = k.Nparam
-        self.name = 'iops('+ k.name + ')'
-        self.k = k
-
-    def _get_params(self):
-        return self.k._get_params()
-
-    def _set_params(self,x):
-        self.k._set_params(x)
-        self.params = x
-
-    def _get_param_names(self):
-        return self.k._get_param_names()
-
-    def K(self,X,X2,target):
-        #Sort out the slices from the input data
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-
-        [[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-    def Kdiag(self,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
-
-    def dK_dtheta(self,dL_dK,X,X2,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-
-    def dK_dX(self,dL_dK,X,X2,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        if X2 is None:
-            X2,slices2 = X,slices
-        else:
-            X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
-        [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
-
-    def dKdiag_dX(self,dL_dKdiag,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
-
-
-    def dKdiag_dtheta(self,dL_dKdiag,X,target):
-        X,slices = X[:,:-1],index_to_slices(X[:,-1])
-        [[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py
index 2afafe25..fc349da8 100644
--- a/GPy/kern/prod_orthogonal.py
+++ b/GPy/kern/prod_orthogonal.py
@@ -22,7 +22,6 @@ class prod_orthogonal(kernpart):
         self.k1 = k1
         self.k2 = k2
         self._set_params(np.hstack((k1._get_params(),k2._get_params())))
-        self._X, self._X2, self._params = np.empty(shape=(3,1)) # initialize cache
 
     def _get_params(self):
         """return the value of the parameters."""
@@ -40,38 +39,23 @@ class prod_orthogonal(kernpart):
 
     def K(self,X,X2,target):
         """Compute the covariance matrix between X and X2."""
-        self._K_computations(X,X2)
-        target += self._K1*self._K2
-
-    def _K_computations(self,X,X2):
-        """
-        Compute the two kernel matrices.
-        The computation is only done if needed: many times it will be the same as the previous call
-        """
-        if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
-            #store new values in cache
-            self._X = X.copy()
-            self._X2 = X2.copy()
-            self._params = self._get_params().copy()
-
-            #update self._K1, self._K2
-            if X2 is None: X2 = X
-            self._K1 = np.zeros((X.shape[0],X2.shape[0]))
-            self._K2 = np.zeros((X.shape[0],X2.shape[0]))
-            self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
-            self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
+        if X2 is None: X2 = X
+        target1 = np.zeros_like(target)
+        target2 = np.zeros_like(target)
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
+        target += target1 * target2
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
-        self._K_computations(X,X2)
-        self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
-        self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
+        if X2 is None: X2 = X
+        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        K2 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
 
-    def dK_dX(self,dL_dK,X,X2,target):
-        """derivative of the covariance matrix with respect to X."""
-        self._K_computations(X,X2)
-        self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
-        self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+        self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
+        self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
 
     def Kdiag(self,X,target):
         """Compute the diagonal of the covariance matrix associated to X."""
@@ -89,6 +73,17 @@ class prod_orthogonal(kernpart):
         self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
         self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
 
+    def dK_dX(self,dL_dK,X,X2,target):
+        """derivative of the covariance matrix with respect to X."""
+        if X2 is None: X2 = X
+        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        K2 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
+        self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
+
+        self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
+        self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
+
     def dKdiag_dX(self, dL_dKdiag, X, target):
         K1 = np.zeros(X.shape[0])
         K2 = np.zeros(X.shape[0])
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 16b22094..4d9edacc 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -148,10 +148,7 @@ class sparse_GP(GP):
         #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
         tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
         self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
-        tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
-        #tmp = np.dot(tmp,self.Kmmi)
-        tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
-        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 - self.Kmmi) + self.E) + tmp # d(C+D)
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From 2c3a53b1740bfbb85a55d827788b1995176bb0b3 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 23 Apr 2013 14:10:38 +0100
Subject: [PATCH 11/11] psi stat tests done and failing gracefully

---
 GPy/testing/psi_stat_tests.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/GPy/testing/psi_stat_tests.py b/GPy/testing/psi_stat_tests.py
index 93f9867c..22737ca1 100644
--- a/GPy/testing/psi_stat_tests.py
+++ b/GPy/testing/psi_stat_tests.py
@@ -39,10 +39,6 @@ class PsiStatModel(model):
         self.Z = x[start: end].reshape(self.M, self.Q)
         self.kern._set_params(x[end:])
     def log_likelihood(self):
-#         if '2' in self.which:
-#             norm = self.N ** 2
-#         else:  # '0', '1' in self.which:
-#             norm = self.N
         return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
     def _log_likelihood_gradients(self):
         psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
@@ -64,23 +60,27 @@ class Test(unittest.TestCase):
     Z = numpy.random.permutation(X)[:M]
     Y = X.dot(numpy.random.randn(Q, D))
 
+    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
+               GPy.kern.linear(Q) + GPy.kern.bias(Q),
+               GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
+
     def testPsi0(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1)))
-        assert m.checkgrad(), "linear x psi0"
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi1(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1)))
-        assert(m.checkgrad())
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 
     def testPsi2(self):
-        kernel = GPy.kern.linear(Q)
-        m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                     M=M, kernel=kernel, mu_or_S=0, dL=numpy.ones((1, 1, 1)))
-        assert(m.checkgrad())
+        for k in self.kernels:
+            m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
+                         M=self.M, kernel=k)
+            assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
 
 
 if __name__ == "__main__":