From 5fa15037ca3f5dda76a4d2c607f1dbc903b24a33 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 9 Jun 2016 14:08:58 +0100
Subject: [PATCH 1/5] Integral kernels added

---
 GPy/kern/src/integral.py                      |  84 ++++++++++++
 GPy/kern/src/integral_limits.py               |  94 ++++++++++++++
 .../src/multidimensional_integral_limits.py   | 120 ++++++++++++++++++
 3 files changed, 298 insertions(+)
 create mode 100644 GPy/kern/src/integral.py
 create mode 100644 GPy/kern/src/integral_limits.py
 create mode 100644 GPy/kern/src/multidimensional_integral_limits.py

diff --git a/GPy/kern/src/integral.py b/GPy/kern/src/integral.py
new file mode 100644
index 00000000..971a48a8
--- /dev/null
+++ b/GPy/kern/src/integral.py
@@ -0,0 +1,84 @@
+# Written by Mike Smith michaeltsmith.org.uk
+
+import numpy as np
+from .kern import Kern
+from ...core.parameterization import Param
+from paramz.transformations import Logexp
+import math
+
+class Integral(Kern): #todo do I need to inherit from Stationary
+    """
+    Integral kernel between...
+    """
+
+    def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
+        super(Integral, self).__init__(input_dim, active_dims, name)
+
+        if lengthscale is None:
+            lengthscale = np.ones(1)
+        else:
+            lengthscale = np.asarray(lengthscale)
+
+        self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
+        self.variances = Param('variances', variances, Logexp()) #and here.
+        self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
+
+    def h(self, z):
+        return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    def dk_dl(self, t, tprime, l): #derivative of the kernel wrt lengthscale
+        return l * ( self.h(t/l) - self.h((t - tprime)/l) + self.h(tprime/l) - 1)
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        if X2 is None:  #we're finding dK_xx/dTheta
+            dK_dl = np.zeros([X.shape[0],X.shape[0]])
+            dK_dv = np.zeros([X.shape[0],X.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X):
+                    dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],self.lengthscale[0]) #TODO Multiple length scales
+                    dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0])  #the gradient wrt the variance is k_xx.
+            self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
+            self.variances.gradient = np.sum(dK_dv * dL_dK)
+            #print "V%0.5f" % self.variances.gradient
+            #print "L%0.5f" % self.lengthscale.gradient
+        else:     #we're finding dK_xf/Dtheta
+            print("NEED TO HANDLE TODO!")
+
+    #useful little function to help calculate the covariances.
+    def g(self,z):
+        return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    #covariance between gradients (it's the gradients that we want out... maybe we should have a way of getting K_ff too? Currently you get the diag of K_ff from Kdiag)
+    def k_xx(self,t,tprime,l):
+        return 0.5 * (l**2) * ( self.g(t/l) - self.g((t - tprime)/l) + self.g(tprime/l) - 1)
+
+    def k_ff(self,t,tprime,l):
+        return np.exp(-((t-tprime)**2)/(l**2)) #rbf
+
+    #covariance between the gradient and the actual value
+    def k_xf(self,t,tprime,l):
+        return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf(tprime/l))
+
+    def K(self, X, X2=None):
+        if X2 is None:
+            K_xx = np.zeros([X.shape[0],X.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X):
+                    K_xx[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0])
+            return K_xx * self.variances[0]
+        else:
+            K_xf = np.zeros([X.shape[0],X2.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X2):
+                    K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0])
+            #print self.variances[0]
+            return K_xf * self.variances[0]
+
+    def Kdiag(self, X):
+        """I've used the fact that we call this method for K_ff when finding the covariance as a hack so
+        I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
+        $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
+        K_ff = np.zeros(X.shape[0])
+        for i,x in enumerate(X):
+            K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
+        return K_ff * self.variances[0]
diff --git a/GPy/kern/src/integral_limits.py b/GPy/kern/src/integral_limits.py
new file mode 100644
index 00000000..7006ee6f
--- /dev/null
+++ b/GPy/kern/src/integral_limits.py
@@ -0,0 +1,94 @@
+# Written by Mike Smith michaeltsmith.org.uk
+
+import numpy as np
+from .kern import Kern
+from ...core.parameterization import Param
+from paramz.transformations import Logexp
+import math
+
+class Integral_Limits(Kern): #todo do I need to inherit from Stationary
+    """
+    Integral kernel, can include limits on each integral value.
+    """
+
+    def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
+        super(Integral_Limits, self).__init__(input_dim, active_dims, name)
+
+        if lengthscale is None:
+            lengthscale = np.ones(1)
+        else:
+            lengthscale = np.asarray(lengthscale)
+
+        self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
+        self.variances = Param('variances', variances, Logexp()) #and here.
+        self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
+
+    def h(self, z):
+        return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
+        return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        if X2 is None:  #we're finding dK_xx/dTheta
+            dK_dl = np.zeros([X.shape[0],X.shape[0]])
+            dK_dv = np.zeros([X.shape[0],X.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X):
+                    dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
+                    dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0])  #the gradient wrt the variance is k_xx.
+            self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
+            self.variances.gradient = np.sum(dK_dv * dL_dK)
+            #print "V%0.5f" % self.variances.gradient
+            #print "L%0.5f" % self.lengthscale.gradient
+        else:     #we're finding dK_xf/Dtheta
+            print("NEED TO HANDLE TODO!")
+
+    #useful little function to help calculate the covariances.
+    def g(self,z):
+        return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    def k_xx(self,t,tprime,s,sprime,l):
+        """Covariance between observed values.
+
+        s and t are one domain of the integral (i.e. the integral between s and t)
+        sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
+
+        We're interested in how correlated these two integrals are.
+
+        Note: We've not multiplied by the variance, this is done in K."""
+        return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
+
+    def k_ff(self,t,tprime,l):
+        """Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
+        return np.exp(-((t-tprime)**2)/(l**2)) #rbf
+
+    def k_xf(self,t,tprime,s,l):
+        """Covariance between the gradient (latent value) and the actual (observed) value.
+
+        Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
+        involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
+        return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
+
+    def K(self, X, X2=None):
+        if X2 is None:
+            K_xx = np.zeros([X.shape[0],X.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X):
+                    K_xx[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
+            return K_xx * self.variances[0]
+        else:
+            K_xf = np.zeros([X.shape[0],X2.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X2):
+                    K_xf[i,j] = self.k_xf(x[0],x2[0],x[1],self.lengthscale[0]) #x2[1] unused, see k_xf docstring for explanation.
+            return K_xf * self.variances[0]
+
+    def Kdiag(self, X):
+        """I've used the fact that we call this method for K_ff when finding the covariance as a hack so
+        I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
+        $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
+        K_ff = np.zeros(X.shape[0])
+        for i,x in enumerate(X):
+            K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
+        return K_ff * self.variances[0]
diff --git a/GPy/kern/src/multidimensional_integral_limits.py b/GPy/kern/src/multidimensional_integral_limits.py
new file mode 100644
index 00000000..0f473742
--- /dev/null
+++ b/GPy/kern/src/multidimensional_integral_limits.py
@@ -0,0 +1,120 @@
+# Written by Mike Smith michaeltsmith.org.uk
+
+import numpy as np
+from .kern import Kern
+from ...core.parameterization import Param
+from paramz.transformations import Logexp
+import math
+
+class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from Stationary
+    """
+    Integral kernel, can include limits on each integral value.
+    """
+
+    def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
+        super(Multidimensional_Integral_Limits, self).__init__(input_dim, active_dims, name)
+
+        if lengthscale is None:
+            lengthscale = np.ones(1)
+        else:
+            lengthscale = np.asarray(lengthscale)
+
+        self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
+        self.variances = Param('variances', variances, Logexp()) #and here.
+        self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
+
+    def h(self, z):
+        return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
+        return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        #print self.variances
+        if X2 is None:  #we're finding dK_xx/dTheta
+            dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
+            k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
+            dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
+            dK_dv = np.zeros([X.shape[0],X.shape[0]])
+            for il,l in enumerate(self.lengthscale):
+                idx = il*2
+                for i,x in enumerate(X):
+                    for j,x2 in enumerate(X):
+                        dK_dl_term[i,j,il] = self.dk_dl(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
+                        k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
+            for il,l in enumerate(self.lengthscale):
+                dK_dl = self.variances[0] * dK_dl_term[:,:,il]
+                for jl, l in enumerate(self.lengthscale):
+                    if jl!=il:
+                        dK_dl *= k_term[:,:,jl]
+                        #dK_dl = np.dot(dK_dl,k_term[:,:,il])
+                        #print k_term[:,:,il]
+                self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
+            dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
+            self.variances.gradient = np.sum(dK_dv * dL_dK)
+        else:     #we're finding dK_xf/Dtheta
+            print("NEED TO HANDLE TODO!")
+        #print self.variances[0],self.lengthscale[0],self.lengthscale[1] #np.sum(dK_dv*dL_dK)
+
+
+    #useful little function to help calculate the covariances.
+    def g(self,z):
+        return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+
+    def k_xx(self,t,tprime,s,sprime,l):
+        """Covariance between observed values.
+
+        s and t are one domain of the integral (i.e. the integral between s and t)
+        sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
+
+        We're interested in how correlated these two integrals are.
+
+        Note: We've not multiplied by the variance, this is done in K."""
+        return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
+
+    def k_ff(self,t,tprime,l):
+        """Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
+        return np.exp(-((t-tprime)**2)/(l**2)) #rbf
+
+    def k_xf(self,t,tprime,s,l):
+        """Covariance between the gradient (latent value) and the actual (observed) value.
+
+        Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
+        involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
+        return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
+
+    def calc_K_xx_wo_variance(self,X):
+        """Calculates K_xx without the variance term"""
+        K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension
+        for i,x in enumerate(X):
+            for j,x2 in enumerate(X):
+                for il,l in enumerate(self.lengthscale):
+                    idx = il*2 #each pair of input dimensions describe the limits on one actual dimension in the data
+                    K_xx[i,j] *= self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
+        return K_xx
+
+    def K(self, X, X2=None):
+        if X2 is None:
+            #print "X x X"
+            K_xx = self.calc_K_xx_wo_variance(X)
+            return K_xx * self.variances[0]
+        else:
+            #print "X x X2"
+            K_xf = np.ones([X.shape[0],X2.shape[0]])
+            for i,x in enumerate(X):
+                for j,x2 in enumerate(X2):
+                    for il,l in enumerate(self.lengthscale):
+                        idx = il*2
+                        K_xf[i,j] *= self.k_xf(x[idx],x2[idx],x[idx+1],l)
+            return K_xf * self.variances[0]
+
+    def Kdiag(self, X):
+        """I've used the fact that we call this method for K_ff when finding the covariance as a hack so
+        I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
+        $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
+        K_ff = np.ones(X.shape[0])
+        for i,x in enumerate(X):
+            for il,l in enumerate(self.lengthscale):
+                idx = il*2
+                K_ff[i] *= self.k_ff(x[idx],x[idx],l)
+        return K_ff * self.variances[0]

From 2c593831c346660ee48660f02fad60e00d5464f1 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 9 Jun 2016 14:50:40 +0100
Subject: [PATCH 2/5] New tests for kernel

---
 GPy/testing/kernel_tests.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 99951eb1..ef72d819 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -416,6 +416,21 @@ class KernelGradientTestsContinuous(unittest.TestCase):
         k.randomize()
         self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
 
+    def test_integral(self):
+        k = GPy.kern.Integral(1)
+        k.randomize()
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
+    def test_multidimensional_integral_limits(self):
+        k = GPy.kern.Multidimensional_Integral_Limits(2)
+        k.randomize()
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
+    def test_integral_limits(self):
+        k = GPy.kern.Integral_Limits(2)
+        k.randomize()
+        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
+
     def test_Linear(self):
         k = GPy.kern.Linear(self.D)
         k.randomize()

From 8b9d5d8f72f8470759d0920915b6547042686066 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Mon, 13 Jun 2016 13:19:33 +0100
Subject: [PATCH 3/5] Improved comments. import future added. Fixed exception

---
 GPy/kern/src/integral.py                      |  8 ++--
 GPy/kern/src/integral_limits.py               | 37 +++++++++++++++----
 .../src/multidimensional_integral_limits.py   | 20 +++++-----
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/GPy/kern/src/integral.py b/GPy/kern/src/integral.py
index 971a48a8..6febf203 100644
--- a/GPy/kern/src/integral.py
+++ b/GPy/kern/src/integral.py
@@ -1,5 +1,6 @@
 # Written by Mike Smith michaeltsmith.org.uk
 
+from __future__ import division
 import numpy as np
 from .kern import Kern
 from ...core.parameterization import Param
@@ -24,7 +25,7 @@ class Integral(Kern): #todo do I need to inherit from Stationary
         self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
 
     def h(self, z):
-        return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
+        return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))        
 
     def dk_dl(self, t, tprime, l): #derivative of the kernel wrt lengthscale
         return l * ( self.h(t/l) - self.h((t - tprime)/l) + self.h(tprime/l) - 1)
@@ -39,10 +40,8 @@ class Integral(Kern): #todo do I need to inherit from Stationary
                     dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0])  #the gradient wrt the variance is k_xx.
             self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
             self.variances.gradient = np.sum(dK_dv * dL_dK)
-            #print "V%0.5f" % self.variances.gradient
-            #print "L%0.5f" % self.lengthscale.gradient
         else:     #we're finding dK_xf/Dtheta
-            print("NEED TO HANDLE TODO!")
+            raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
 
     #useful little function to help calculate the covariances.
     def g(self,z):
@@ -71,7 +70,6 @@ class Integral(Kern): #todo do I need to inherit from Stationary
             for i,x in enumerate(X):
                 for j,x2 in enumerate(X2):
                     K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0])
-            #print self.variances[0]
             return K_xf * self.variances[0]
 
     def Kdiag(self, X):
diff --git a/GPy/kern/src/integral_limits.py b/GPy/kern/src/integral_limits.py
index 7006ee6f..10370328 100644
--- a/GPy/kern/src/integral_limits.py
+++ b/GPy/kern/src/integral_limits.py
@@ -1,17 +1,23 @@
 # Written by Mike Smith michaeltsmith.org.uk
 
+from __future__ import division
+import math
 import numpy as np
 from .kern import Kern
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
-import math
 
-class Integral_Limits(Kern): #todo do I need to inherit from Stationary
+
+class Integral_Limits(Kern): 
     """
-    Integral kernel, can include limits on each integral value.
+    Integral kernel. This kernel allows 1d histogram or binned data to be modelled.
+    The outputs are the counts in each bin. The inputs (on two dimensions) are the start and end points of each bin.
+    The kernel's predictions are the latent function which might have generated those binned results.
     """
 
     def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
+        """
+        """
         super(Integral_Limits, self).__init__(input_dim, active_dims, name)
 
         if lengthscale is None:
@@ -39,10 +45,8 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
                     dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0])  #the gradient wrt the variance is k_xx.
             self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
             self.variances.gradient = np.sum(dK_dv * dL_dK)
-            #print "V%0.5f" % self.variances.gradient
-            #print "L%0.5f" % self.lengthscale.gradient
         else:     #we're finding dK_xf/Dtheta
-            print("NEED TO HANDLE TODO!")
+            raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
 
     #useful little function to help calculate the covariances.
     def g(self,z):
@@ -71,6 +75,22 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
         return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
 
     def K(self, X, X2=None):
+        """Note: We have a latent function and an output function. We want to be able to find:
+          - the covariance between values of the output function
+          - the covariance between values of the latent function
+          - the "cross covariance" between values of the output function and the latent function
+        This method is used by GPy to either get the covariance between the outputs (K_xx) or
+        is used to get the cross covariance (between the latent function and the outputs (K_xf).
+        We take advantage of the places where this function is used:
+         - if X2 is none, then we know that the items being compared (to get the covariance for)
+         are going to be both from the OUTPUT FUNCTION.
+         - if X2 is not none, then we know that the items being compared are from two different
+         sets (the OUTPUT FUNCTION and the LATENT FUNCTION).
+        
+        If we want the covariance between values of the LATENT FUNCTION, we take advantage of
+        the fact that we only need that when we do prediction, and this only calls Kdiag (not K).
+        So the covariance between LATENT FUNCTIONS is available from Kdiag.        
+        """
         if X2 is None:
             K_xx = np.zeros([X.shape[0],X.shape[0]])
             for i,x in enumerate(X):
@@ -85,8 +105,9 @@ class Integral_Limits(Kern): #todo do I need to inherit from Stationary
             return K_xf * self.variances[0]
 
     def Kdiag(self, X):
-        """I've used the fact that we call this method for K_ff when finding the covariance as a hack so
-        I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
+        """I've used the fact that we call this method during prediction (instead of K). When we
+        do prediction we want to know the covariance between LATENT FUNCTIONS (K_ff) (as that's probably
+        what the user wants).
         $K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
         K_ff = np.zeros(X.shape[0])
         for i,x in enumerate(X):
diff --git a/GPy/kern/src/multidimensional_integral_limits.py b/GPy/kern/src/multidimensional_integral_limits.py
index 0f473742..8a07595b 100644
--- a/GPy/kern/src/multidimensional_integral_limits.py
+++ b/GPy/kern/src/multidimensional_integral_limits.py
@@ -1,5 +1,6 @@
 # Written by Mike Smith michaeltsmith.org.uk
 
+from __future__ import division
 import numpy as np
 from .kern import Kern
 from ...core.parameterization import Param
@@ -8,7 +9,11 @@ import math
 
 class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from Stationary
     """
-    Integral kernel, can include limits on each integral value.
+    Integral kernel, can include limits on each integral value. This kernel allows an n-dimensional
+    histogram or binned data to be modelled. The outputs are the counts in each bin. The inputs
+    are the start and end points of each bin: Pairs of inputs act as the limits on each bin. So
+    inputs 4 and 5 provide the start and end values of each bin in the 3rd dimension.
+    The kernel's predictions are the latent function which might have generated those binned results.    
     """
 
     def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
@@ -30,7 +35,6 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
         return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
 
     def update_gradients_full(self, dL_dK, X, X2=None):
-        #print self.variances
         if X2 is None:  #we're finding dK_xx/dTheta
             dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
             k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
@@ -47,14 +51,12 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
                 for jl, l in enumerate(self.lengthscale):
                     if jl!=il:
                         dK_dl *= k_term[:,:,jl]
-                        #dK_dl = np.dot(dK_dl,k_term[:,:,il])
-                        #print k_term[:,:,il]
                 self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
             dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
             self.variances.gradient = np.sum(dK_dv * dL_dK)
         else:     #we're finding dK_xf/Dtheta
-            print("NEED TO HANDLE TODO!")
-        #print self.variances[0],self.lengthscale[0],self.lengthscale[1] #np.sum(dK_dv*dL_dK)
+            raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
+
 
 
     #useful little function to help calculate the covariances.
@@ -94,12 +96,10 @@ class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from St
         return K_xx
 
     def K(self, X, X2=None):
-        if X2 is None:
-            #print "X x X"
+        if X2 is None: #X vs X
             K_xx = self.calc_K_xx_wo_variance(X)
             return K_xx * self.variances[0]
-        else:
-            #print "X x X2"
+        else: #X vs X2
             K_xf = np.ones([X.shape[0],X2.shape[0]])
             for i,x in enumerate(X):
                 for j,x2 in enumerate(X2):

From 286077f7a7b7df0a64fc12264bab41f3f637069f Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Mon, 13 Jun 2016 14:35:48 +0100
Subject: [PATCH 4/5] References integral kernel classes

---
 GPy/kern/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 4a2201b1..c9304f39 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -24,6 +24,9 @@ from .src.ODE_st import ODE_st
 from .src.ODE_t import ODE_t
 from .src.poly import Poly
 from .src.eq_ode2 import EQ_ODE2
+from .src.integral import Integral
+from .src.integral_limits import Integral_Limits
+from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
 from .src.eq_ode1 import EQ_ODE1
 from .src.trunclinear import TruncLinear,TruncLinear_inf
 from .src.splitKern import SplitKern,DEtime

From 0d89b00e42cbcd7fc7e6c543c99b860d0861c61d Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Mon, 13 Jun 2016 15:19:07 +0100
Subject: [PATCH 5/5] Modified testing to allow not implemented exceptions in
 update_gradients_full

---
 GPy/testing/kernel_tests.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index ef72d819..5bd86e76 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -193,7 +193,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
 
     if verbose:
         print("Checking gradients of K(X, X2) wrt theta.")
-    result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
+    try:
+        result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
+    except NotImplementedError:
+        result=True
+        if verbose:
+            print(("update_gradients_full, with differing X and X2, not implemented for " + kern.name))
     if result and verbose:
         print("Check passed.")
     if not result: