From cb082898d335aea8be110e615a9666065669da4e Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 18:56:37 +0000
Subject: [PATCH 1/9] added trace_sum for efficiency

---
 GPy/models/sparse_GP.py | 6 +++---
 GPy/util/linalg.py      | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f70938c9..4846bf8a 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pylab as pb
-from ..util.linalg import mdot, jitchol, chol_inv, pdinv
+from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
 from ..util.plot import gpplot
 from .. import kern
 from GP import GP
@@ -107,7 +107,7 @@ class sparse_GP(GP):
         self.C = np.dot(tmp,tmp.T)
         #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
         #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
-        tmp = np.dot(self.C,self.psi1V/sf)
+        tmp = np.dot(self.C/sf,self.psi1V)
         self.E = np.dot(tmp,tmp.T)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
@@ -156,7 +156,7 @@ class sparse_GP(GP):
             beta = self.likelihood.precision
             dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
             dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
-            dbeta += - 0.5 * self.D * np.sum(self.Bi*self.A)/beta
+            dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta
             dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
             self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
 
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 26105789..cf023284 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -14,6 +14,13 @@ import types
 #import scipy.lib.lapack.flapack
 import scipy as sp
 
+def trace_dot(a,b):
+    """
+    efficiently compute the trace of the matrix product of a and b
+    """
+    assert a.shape==b.T.shape
+    return np.dot(a.flatten(),b.T.flatten())
+
 def mdot(*args):
    """Multiply all the arguments using matrix product rules.
    The output is equivalent to multiplying the arguments one by one

From d3c87feffadbbc281f16cb470b0c5cae6731e923 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 11 Mar 2013 19:43:46 +0000
Subject: [PATCH 2/9] some messing with the linear algebra in sparse_GP. This
 should be more efficient... let's hope nothing breaks

---
 GPy/models/sparse_GP.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index 4846bf8a..f5279f86 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -102,17 +102,16 @@ class sparse_GP(GP):
         self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
 
         self.psi1V = np.dot(self.psi1, self.V)
-        self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
         tmp = np.dot(self.Lmi.T, self.LBi.T)
         self.C = np.dot(tmp,tmp.T)
-        #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
-        #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
-        tmp = np.dot(self.C/sf,self.psi1V)
-        self.E = np.dot(tmp,tmp.T)
+        self.Cpsi1V = np.dot(self.C,self.psi1V)
+        self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
+        self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
-        self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
+        #self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
+        self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
         if self.likelihood.is_heteroscedastic:
             if self.has_uncertain_inputs:
                 self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
@@ -139,7 +138,7 @@ class sparse_GP(GP):
         # Compute dL_dKmm
         self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
         self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
-        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
+        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:
@@ -157,7 +156,7 @@ class sparse_GP(GP):
             dbeta =   0.5 * self.N*self.D/beta - 0.5 * np.sum(np.square(self.likelihood.Y))
             dbeta += - 0.5 * self.D * (self.psi0.sum() - np.trace(self.A)/beta*sf2)
             dbeta += - 0.5 * self.D * trace_dot(self.Bi,self.A)/beta
-            dbeta += np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/beta
+            dbeta += np.trace(self.Cpsi1VVpsi1)/beta - 0.5 * trace_dot(np.dot(self.C,self.psi2_beta_scaled) , self.Cpsi1VVpsi1 )/beta
             self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
 
 
@@ -198,7 +197,7 @@ class sparse_GP(GP):
             A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
             B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
         C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
-        D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
+        D = 0.5*np.trace(self.Cpsi1VVpsi1)
         return A+B+C+D
 
     def _log_likelihood_gradients(self):

From 7e4b460cdbd4e37597efc9aa6403ed084b09e020 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 09:18:15 +0000
Subject: [PATCH 3/9] more messing with the linear algebra in sparse_GP

---
 GPy/models/sparse_GP.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f5279f86..be451c12 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -137,8 +137,9 @@ class sparse_GP(GP):
 
         # Compute dL_dKmm
         self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
-        self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
-        self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
+        #self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
+        #self.dL_dKmm +=  np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
+        self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
 
         #the partial derivative vector for the likelihood
         if self.likelihood.Nparams ==0:

From 77df63952ff15f9a88ad43637264e5da947d86d7 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 09:36:11 +0000
Subject: [PATCH 4/9] updated list of implemented kernels in the documentation

---
 doc/kernel_implementation.rst | 60 ++++++++++++++++++-----------------
 doc/tuto_kernel_overview.rst  |  2 +-
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 99ee006b..521087ba 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -5,35 +5,37 @@ List of implemented kernels
 
 The following table shows the implemented kernels in GPy and gives the details of the implemented function for each kernel.
 
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-NAME                  get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-bias                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Brownian              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                                                
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-exponential           |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-finite_dimensional    |tick|       |tick|  |tick|  |tick|      |tick| 
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-linear                |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Matern32              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|        
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Matern52              |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_exponential  |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern32     |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern52     |tick|       |tick|  |tick|  |tick|      |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-rbf                   |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-spline                |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
---------------------  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-white                 |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
-====================  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+NAME                  Dimension   ARD   get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
+bias                 n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Brownian             1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+exponential          n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+finite_dimensional   n           no     |tick|       |tick|  |tick|  |tick|      |tick| 
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+linear               n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern32             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|        
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+Matern52             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_exponential 1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern32    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+periodic_Matern52    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+rational quadratic   1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+rbf                  n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+spline               n           no     |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
+-------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
+white                n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
 Depending on the use, all functions may not be required
 
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index da19803b..27f895ba 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -39,7 +39,7 @@ return::
 Implemented kernels
 ===================
 
-Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ and the following figure gives a summary of most of them:
+Many kernels are already implemented in GPy. The following figure gives a summary of most of them (a comprehensive list can be list can be found `here <kernel_implementation.html>`_):
 
 .. figure::  Figures/tuto_kern_overview_allkern.png
     :align:  center

From f14302e8d0aa92bd61e8b2ad62db52b34e79c14e Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 09:41:27 +0000
Subject: [PATCH 5/9] updated list of implemented kernels in the documentation

---
 doc/kernel_implementation.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/kernel_implementation.rst b/doc/kernel_implementation.rst
index 521087ba..c566d21d 100644
--- a/doc/kernel_implementation.rst
+++ b/doc/kernel_implementation.rst
@@ -8,13 +8,13 @@ The following table shows the implemented kernels in GPy and gives the details o
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 NAME                  Dimension   ARD   get/set      K       Kdiag   dK_dtheta   dKdiag_dtheta   dK_dX   dKdiag_dX   psi0   psi1   psi2
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
-bias                 n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+bias                 n                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-Brownian             1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
+Brownian             1                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                 
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 exponential          n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-finite_dimensional   n           no     |tick|       |tick|  |tick|  |tick|      |tick| 
+finite_dimensional   n                  |tick|       |tick|  |tick|  |tick|      |tick| 
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 linear               n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|              |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
@@ -22,19 +22,19 @@ Matern32             n           yes    |tick|       |tick|  |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 Matern52             n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_exponential 1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_exponential 1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern32    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_Matern32    1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-periodic_Matern52    1           no     |tick|       |tick|  |tick|  |tick|      |tick|
+periodic_Matern52    1                  |tick|       |tick|  |tick|  |tick|      |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-rational quadratic   1           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
+rational quadratic   1                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|                           
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
 rbf                  n           yes    |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-spline               n           no     |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
+spline               1                  |tick|       |tick|  |tick|  |tick|      |tick|                  |tick|     
 -------------------- ----------- -----  -----------  ------  ------- ----------- --------------- ------- ----------- ------ ------ -------
-white                n           no     |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
+white                n                  |tick|       |tick|  |tick|  |tick|      |tick|          |tick|  |tick|      |tick| |tick| |tick|
 ==================== =========== =====  ===========  ======  ======= =========== =============== ======= =========== ====== ====== =======
 
 Depending on the use, all functions may not be required

From bddeb998bf691e51514ba845cbecfa10484ff0fe Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 10:04:02 +0000
Subject: [PATCH 6/9] typo in comments

---
 GPy/models/sparse_GP.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index be451c12..a348c9f4 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -80,7 +80,7 @@ class sparse_GP(GP):
 
         #The rather complex computations of psi2_beta_scaled
         if self.likelihood.is_heteroscedastic:
-            assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
+            assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
             if self.has_uncertain_inputs:
                 self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
             else:

From c3dae7b25255bd2168063226b0d97a2e6ce649f3 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Tue, 12 Mar 2013 10:43:46 +0000
Subject: [PATCH 7/9] Fixed bug in dK_dX for the quadratic kernel

---
 GPy/kern/rational_quadratic.py    | 4 ++--
 doc/tuto_creating_new_kernels.rst | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py
index 15200fd3..561ea065 100644
--- a/GPy/kern/rational_quadratic.py
+++ b/GPy/kern/rational_quadratic.py
@@ -73,8 +73,8 @@ class rational_quadratic(kernpart):
         if X2 is None: X2 = X
         dist2 = np.square((X-X2.T)/self.lengthscale)
 
-        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
-        target += np.sum(dL_dK*dX)
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.lengthscale)**(-self.power-1)
+        target += np.sum(dL_dK*dX,1)[:,np.newaxis]
 
     def dKdiag_dX(self,dL_dKdiag,X,target):
         pass
diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst
index 8ebf8b8f..24003ba2 100644
--- a/doc/tuto_creating_new_kernels.rst
+++ b/doc/tuto_creating_new_kernels.rst
@@ -136,8 +136,8 @@ Computes the derivative of the likelihood with respect to the inputs ``X`` (a :m
         if X2 is None: X2 = X
         dist2 = np.square((X-X2.T)/self.lengthscale)
 
-        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.power)**(-self.power-1)
-        target += np.sum(dL_dK*dX)
+        dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 *  (1 + dist2/2./self.lengthscale)**(-self.power-1)
+        target += np.sum(dL_dK*dX,1)[:,np.newaxis]
 
 **dKdiag_dX(self,dL_dKdiag,X,target)**
 

From 09dd452b54c30f9ba113fe7709c886a6a0d66128 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 11:18:25 +0000
Subject: [PATCH 8/9] added SCG code

---
 GPy/inference/SCG.py | 142 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 GPy/inference/SCG.py

diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py
new file mode 100644
index 00000000..cbaee3e7
--- /dev/null
+++ b/GPy/inference/SCG.py
@@ -0,0 +1,142 @@
+#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
+
+#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
+
+#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
+#      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+#      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+#      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+#      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+#      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+#      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+#      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+#      HOWEVER CAUSED AND ON ANY THEORY OF
+#      LIABILITY, WHETHER IN CONTRACT, STRICT
+#      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#      POSSIBILITY OF SUCH DAMAGE.
+
+
+import numpy as np
+
+def SCG(f, gradf, x, optargs=(), maxiters=500, display=True, xtol=1e-6, ftol=1e-6):
+    """
+    Optimisation through Scaled Conjugate Gradients (SCG)
+
+    f: the objective function
+    gradf : the gradient function (should return a 1D np.ndarray)
+    x : the initial condition
+
+    Returns
+    x the optimal value for x
+    flog : a list of all the objective values
+    pointlog : a list of the x values tried
+    scalelog : a list of the scales used in optimisation (beta)
+
+    """
+
+    sigma0 = 1.0e-4
+    fold = f(x, *optargs)	# Initial function value.
+    fnow = fold
+    gradnew = gradf(x, *optargs)	# Initial gradient.
+    gradold = gradnew.copy()
+    d = -gradnew				# Initial search direction.
+    success = True				# Force calculation of directional derivs.
+    nsuccess = 0				# nsuccess counts number of successes.
+    beta = 1.0				# Initial scale parameter.
+    betamin = 1.0e-15 			# Lower bound on scale.
+    betamax = 1.0e100			# Upper bound on scale.
+
+    flog = [fold]
+    pointlog = [x.copy()]
+    scalelog = [beta]
+
+    iteration = 0
+
+    # Main optimization loop.
+    while iteration < maxiters:
+
+        # Calculate first and second directional derivatives.
+        if success:
+            mu = np.dot(d, gradnew)
+            if mu >= 0:
+                d = -gradnew
+                mu = np.dot(d, gradnew)
+            kappa = np.dot(d, d)
+            #if kappa < eps():
+                #return x, flog, pointlog, scalelog
+            sigma = sigma0/np.sqrt(kappa)
+            xplus = x + sigma*d
+            gplus = gradf(xplus, *optargs)
+            theta = np.dot(d, (gplus - gradnew))/sigma
+
+        # Increase effective curvature and evaluate step size alpha.
+        delta = theta + beta*kappa
+        if delta <= 0:
+            delta = beta*kappa
+            beta = beta - theta/kappa
+
+        alpha = - mu/delta
+
+        # Calculate the comparison ratio.
+        xnew = x + alpha*d
+        fnew = f(xnew, *optargs)
+        Delta = 2.*(fnew - fold)/(alpha*mu)
+        if Delta  >= 0.:
+            success = True
+            nsuccess += 1
+            x = xnew
+            fnow = fnew
+        else:
+            success = False
+            fnow = fold
+
+        # Store relevant variables
+        flog.append(fnow)		# Current function value
+        pointlog.append(x)           # Current position
+        scalelog.append(beta)    # current scale parameter
+
+        iteration += 1
+        if display:
+            print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta
+
+        if success:
+            # Test for termination
+            if np.max(np.abs(alpha*d)) < xtol or np.max(np.abs(fnew-fold)) < ftol:
+                return x, flog, pointlog, scalelog
+
+            else:
+                # Update variables for new position
+                fold = fnew
+                gradold = gradnew
+                gradnew = gradf(x, *optargs)
+                # If the gradient is zero then we are done.
+                if np.dot(gradnew,gradnew) == 0:
+                    return x, flog, pointlog, scalelog
+
+        # Adjust beta according to comparison ratio.
+        if Delta < 0.25:
+            beta = min(4.0*beta, betamax)
+        if Delta > 0.75:
+            beta = max(0.5*beta, betamin)
+
+        # Update search direction using Polak-Ribiere formula, or re-start
+        # in direction of negative gradient after nparams steps.
+        if nsuccess == x.size:
+            d = -gradnew
+            nsuccess = 0
+        elif success:
+            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
+            d = gamma*d - gradnew
+
+    # If we get here, then we haven't terminated in the given number of
+    # iterations.
+    if display:
+        print "maxiter exceeded"
+
+    return x, flog, pointlog, scalelog

From 7cb20798941b0d39d94637120ced0b3ad958e5b3 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 12 Mar 2013 11:44:28 +0000
Subject: [PATCH 9/9] non working integratino of SCG into GPy

---
 GPy/inference/optimization.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py
index 94a5bcef..2e29fe6a 100644
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@@ -196,6 +196,16 @@ class opt_rasm(Optimizer):
 
         self.trace = opt_result[1]
 
+class opt_scg(Optimizer):
+    def __init__(self, *args, **kwargs):
+        Optimizer.__init__(self, *args, **kwargs)
+        self.opt_name = "Scaled Conjugate Gradients"
+
+    def opt(self, f_fp = None, f = None, fp = None):
+        assert not f is None
+        assert not fp is None
+        opt_result = SCG (f,fp,self.x_init, display=self.messages,
+
 def get_optimizer(f_min):
     # import rasmussens_minimize as rasm
     from SGD import opt_SGD