From c701fb9f093f47e319b37ae3c34d729c6c7b2437 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 14:55:23 +0100
Subject: [PATCH 01/15] Adding refs to new clustering and offset model and
 utility

---
 GPy/models/__init__.py | 2 ++
 GPy/util/__init__.py   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index 654b1938..6d19d8a5 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -26,3 +26,5 @@ from .dpgplvm import DPBayesianGPLVM
 from .state_space_model import StateSpace
 
 from .ibp_lfm import IBPLFM
+
+from .gp_offset_regression import GPOffsetRegression
diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index fb1eb0d6..685551fd 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -16,3 +16,4 @@ from . import initialization
 from . import multioutput
 from . import parallel
 from . import functions
+from . import cluster_with_offset

From 132c8d92b2badabeac3e6167f433e9bfb726641c Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 15:27:49 +0100
Subject: [PATCH 02/15] Offset model and clustering utility

---
 GPy/models/gp_offset_regression.py |  92 +++++++++++++++
 GPy/util/cluster_with_offset.py    | 180 +++++++++++++++++++++++++++++
 2 files changed, 272 insertions(+)
 create mode 100644 GPy/models/gp_offset_regression.py
 create mode 100644 GPy/util/cluster_with_offset.py

diff --git a/GPy/models/gp_offset_regression.py b/GPy/models/gp_offset_regression.py
new file mode 100644
index 00000000..3699223e
--- /dev/null
+++ b/GPy/models/gp_offset_regression.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+# Written by Mike Smith. michaeltsmith.org.uk
+
+import numpy as np
+from ..core import GP
+from .. import likelihoods
+from .. import kern
+from ..core import Param
+
+class GPOffsetRegression(GP):
+    """
+    Gaussian Process model for offset regression
+
+    :param X: input observations, we assume for this class that this has one dimension of actual inputs and the last dimension should be the index of the cluster (so X should be Nx2)
+    :param Y: observed values (Nx1?)
+    :param kernel: a GPy kernel, defaults to rbf
+    :param Norm normalizer: [False]
+    :param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
+
+        assert X.shape[1]>1, "Need at least two input dimensions, as last dimension is the label of the cluster"
+        if kernel is None:
+            kernel = kern.RBF(X.shape[1]-1)
+
+        #self._log_marginal_likelihood = np.nan #todo
+        
+        likelihood = likelihoods.Gaussian(variance=noise_var)
+        self.X_fixed = X[:,:-1]
+        self.selected = np.array([int(x) for x in X[:,-1]])
+        
+        
+        super(GPOffsetRegression, self).__init__(X, Y, kernel, likelihood, name='GP offset regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
+        maxcluster = np.max(self.selected)
+        self.offset = Param('offset', np.zeros(maxcluster))
+        #self.offset.set_prior(...)
+        self.link_parameter(self.offset)
+        
+    #def dr_doffset(self, X, sel): #how much r changes wrt the offset hyperparameters
+        
+    #def dL_doffset(self, X, sel):
+    #    dL_dr = self.dK_dr_via_X(X, X) * dL_dK
+        
+        
+        
+    def dr_doffset(self,X,sel,delta):
+        #given an input matrix, X and the offsets (delta)
+        #finds dr/dDelta
+        #returns them as a list, one for each offset (delta).        
+        #get the input values
+
+        #a matrix G represents the effect of increasing the offset on the radius passed to the kernel for each input. For example
+        #what effect will increasing offset 4 have on the kernel output of inputs 5 and 8? Answer: Gs[4][5,8]... (positive or negative)
+        Gs = []
+        for i,d in enumerate(delta):
+            #X[sel==(i+1)]-=d
+            G = np.repeat(np.array(sel==(i+1))[:,None]*1,len(X),axis=1) - np.repeat(np.array(sel==(i+1))[None,:]*1,len(X),axis=0)
+            Gs.append(G)
+        #does subtracting the two Xs end up positive or negative (if negative we need to flip the sign in G).
+        w = np.repeat(X,len(X),axis=1) - np.repeat(X.T,len(X),axis=0)
+        dr_doffsets = []
+        for i,d in enumerate(delta):
+            dr_doffset = np.sign(w * Gs[i])
+            #print "dr_doffset %d" % i
+            #print dr_doffset
+            #print Gs[i]
+            #print w
+            dr_doffsets.append(dr_doffset)
+        return dr_doffsets
+        
+    def parameters_changed(self):
+        offsets = np.hstack([0.0,self.offset.values])[:,None]
+        
+        self.X = self.X_fixed - offsets[self.selected]
+        super(GPOffsetRegression, self).parameters_changed()
+        
+        dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
+        
+        dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
+        for i in range(len(dr_doff)):
+            dL_doff = dL_dr * dr_doff[i]
+            self.offset.gradient[i] = -np.sum(dL_doff)
+        
diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py
new file mode 100644
index 00000000..103bdaca
--- /dev/null
+++ b/GPy/util/cluster_with_offset.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2016, Mike Smith
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import GPy
+import numpy as np
+
+def get_log_likelihood(inputs,data,clust):
+    """Get the LL of a combined set of clusters, ignoring time series offsets.
+    
+    Get the log likelihood of a cluster without worrying about the fact
+    different time series are offset. We're using it here really for those
+    cases in which we only have one cluster to get the loglikelihood of.
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    clust -- list of clusters to use
+    
+    returns a tuple:
+    log likelihood and the offset (which is always zero for this model)
+    """
+ 
+    S = data[0].shape[0] #number of time series
+    
+    #build a new dataset from the clusters, by combining all clusters together
+    X = np.zeros([0,1])
+    Y = np.zeros([0,S])
+    
+    #for each person in the cluster,
+    #add their inputs and data to the new dataset
+    for p in clust:
+        X = np.vstack([X,inputs[p]])
+        Y = np.vstack([Y,data[p].T])
+        
+    #find the loglikelihood. We just add together the LL for each time series.
+    #ll=0
+    #for s in range(S):
+    #    m = GPy.models.GPRegression(X,Y[:,s][:,None])
+    #    m.optimize()
+    #    ll+=m.log_likelihood()
+
+    m = GPy.models.GPRegression(X,Y)
+    m.optimize()
+    ll=m.log_likelihood()    
+    return ll,0
+
+def get_log_likelihood_offset(inputs,data,clust):
+    """Get the log likelihood of a combined set of clusters, fitting the offsets
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    clust -- list of clusters to use
+    
+    returns a tuple:
+    log likelihood and the offset
+    """    
+        
+    #if we've only got one cluster, the model has an error, so we want to just
+    #use normal GPRegression.
+    if len(clust)==1: 
+        return get_log_likelihood(inputs,data,clust)
+                
+    S = data[0].shape[0] #number of time series
+        
+    X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
+    Y = np.zeros([0,S])
+    
+    #for each person in the cluster, add their inputs and data to the new
+    #dataset. Note we add an index identifying which person is which data point.
+    #This is for the offset model to use, to allow it to know which data points
+    #to shift.
+    for i,p in enumerate(clust):
+        idx = i*np.ones([inputs[p].shape[0],1])
+        X = np.vstack([X,np.hstack([inputs[p],idx])])
+        Y = np.vstack([Y,data[p].T])
+        
+    m = GPy.models.GPOffsetRegression(X,Y)
+    #TODO: How to select a sensible prior?
+    m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
+    #TODO: Set a sensible start value for the length scale,
+    #make it long to help the offset fit.
+    
+    m.optimize()
+    
+    ll = m.log_likelihood()
+    offset = m.offset.values[0]
+    return ll,offset
+
+def cluster(data,inputs,verbose=False):
+    """Clusters data
+    
+    Using the new offset model, this method uses a greedy algorithm to cluster
+    the data. It starts with all the data points in separate clusters and tests
+    whether combining them increases the overall log-likelihood (LL). It then
+    iteratively joins pairs of clusters which cause the greatest increase in
+    the LL, until no join increases the LL.
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    
+    returns a list of the clusters.    
+    """
+    N=len(data)
+    
+    
+    #Define a set of N active cluster
+    active = []
+    for p in range(0,N):
+        active.append([p])
+
+    loglikes = np.zeros(len(active))
+    loglikes[:] = None
+
+    pairloglikes = np.zeros([len(active),len(active)])
+    pairloglikes[:] = None
+    pairoffset = np.zeros([len(active),len(active)])
+
+    it = 0
+    while True:
+    
+        if verbose:
+            it +=1
+            print "Iteration %d" % it
+        
+        #Compute the log-likelihood of each cluster (add them together)
+        for clusti in range(len(active)):
+            if np.isnan(loglikes[clusti]):
+                loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
+
+            #try combining with each other cluster...
+            for clustj in range(clusti): #count from 0 to clustj-1
+                temp = [clusti,clustj]
+                if np.isnan(pairloglikes[clusti,clustj]):
+                    pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
+
+        seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
+        loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
+        top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
+
+        #if loglikeimprovement.shape[0]<3:
+        # #no more clustering to do - this shouldn't happen really unless
+        # #we've set the threshold to apply clustering to less than 0
+        #    break 
+        
+        #if theres further clustering to be done...
+        if loglikeimprovement[top[0],top[1]]>0:
+            active[top[0]].extend(active[top[1]])
+            offset=pairoffset[top[0],top[1]]
+            inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
+            data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
+            del inputs[top[1]]
+            del data[top[1]]
+            del active[top[1]]
+
+            #None = message to say we need to recalculate
+            pairloglikes[:,top[0]] = None 
+            pairloglikes[top[0],:] = None 
+            pairloglikes = np.delete(pairloglikes,top[1],0)
+            pairloglikes = np.delete(pairloglikes,top[1],1)
+            loglikes[top[0]] = None
+            loglikes = np.delete(loglikes,top[1])
+        else:
+            break
+            
+        #if loglikeimprovement[top[0],top[1]]>0:
+        #    print "joined"
+        #    print top
+        #    print offset
+        #    print offsets
+        #    print offsets[top[1]]-offsets[top[0]]
+
+    #TODO Add a way to return the offsets applied to all the time series
+    return active 
+
+#starttime = time.time()
+#active = cluster(data,inputs)
+#endtime = time.time()
+#print "TOTAL TIME %0.4f" % (endtime-starttime)

From ea8b7321815ea07ef0a1688668ed03c6b55514f0 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 17:49:01 +0100
Subject: [PATCH 03/15] Corrected v2 missing print brackets. Added test code
 for new model and util

---
 GPy/testing/model_tests.py      | 18 ++++++++++++
 GPy/testing/util_tests.py       | 49 +++++++++++++++++++++++++++++++++
 GPy/util/cluster_with_offset.py |  2 +-
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index e4411e23..ff137299 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -405,6 +405,24 @@ class MiscTests(unittest.TestCase):
         warp_m.plot()
         warp_f.plot(X.min()-10, X.max()+10)
         plt.show()
+        
+    def test_offset_regression(self):
+        #Tests GPy.models.GPOffsetRegression. Using two small time series
+        #from a sine wave, we confirm the algorithm determines that the
+        #likelihood is maximised when the offset hyperparameter is approximately
+        #equal to the actual offset in X between the two time series.
+        offset = 3
+        X1 = np.arange(0,50,5.0)[:,None]
+        X2 = np.arange(0+offset,50+offset,5.0)[:,None]
+        X = np.vstack([X1,X2])
+        ind = np.vstack([np.zeros([10,1]),np.ones([10,1])])
+        X = np.hstack([X,ind])
+        Y = np.sin((X[0:10,0])/30.0)[:,None]
+        Y = np.vstack([Y,Y])
+
+        m = GPy.models.GPOffsetRegression(X,Y)
+        m.optimize()
+        assert np.abs(m.offset[0]-offset)<0.1, "GPOffsetRegression model failing to estimate correct offset."
 
 
 
diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
index 3c6241f3..e82f7c1a 100644
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@@ -107,3 +107,52 @@ class TestDebug(unittest.TestCase):
         self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5])
         self.assertTrue(d[tuple(X[:,1])] == [1])
 
+    def test_offset_cluster(self):
+        #Tests the GPy.util.cluster_with_offset.cluster utility with a small
+        #test data set. Not using random noise just in case it occasionally
+        #causes it not to cluster correctly.
+        #groundtruth cluster identifiers are: [0,1,1,0]
+        
+        #data contains a list of the four sets of time series (3 per data point)      
+
+        data = [np.array([[ 2.18094245,  1.96529789,  2.00265523,  2.18218742,  2.06795428],
+                [ 1.62254829,  1.75748448,  1.83879347,  1.87531326,  1.52503496],
+                [ 1.54589609,  1.61607914,  2.00463192,  1.48771394,  1.63339218]]),
+         np.array([[ 2.86766106,  2.97953437,  2.91958876,  2.92510506,  3.03239241],
+                [ 2.57368423,  2.59954886,  3.10000395,  2.75806125,  2.89865704],
+                [ 2.58916318,  2.53698259,  2.63858411,  2.63102504,  2.51853901]]),
+         np.array([[ 2.77834168,  2.9618564 ,  2.88482141,  3.24259745,  2.9716821 ],
+                [ 2.60675576,  2.67095624,  2.94824436,  2.80520631,  2.87247516],
+                [ 2.49543562,  2.5492281 ,  2.6505866 ,  2.65015308,  2.59738616]]),
+         np.array([[ 1.76783086,  2.21666738,  2.07939706,  1.9268263 ,  2.23360121],
+                [ 1.94305547,  1.94648592,  2.1278921 ,  2.09481457,  2.08575238],
+                [ 1.69336013,  1.72285186,  1.6339506 ,  1.61212022,  1.39198698]])]
+
+        #inputs contains their associated X values
+        
+        inputs = [np.array([[ 0.        ],
+                [ 0.68040097],
+                [ 1.20316795],
+                [ 1.798749  ],
+                [ 2.14891733]]), np.array([[ 0.        ],
+                [ 0.51910637],
+                [ 0.98259352],
+                [ 1.57442965],
+                [ 1.82515098]]), np.array([[ 0.        ],
+                [ 0.66645478],
+                [ 1.59464591],
+                [ 1.69769551],
+                [ 1.80932752]]), np.array([[ 0.        ],
+                [ 0.87512108],
+                [ 1.71881079],
+                [ 2.67162871],
+                [ 3.23761907]])]
+            
+        #try doing the clustering
+        active = GPy.util.cluster_with_offset.cluster(data,inputs)
+
+        #check to see that the clustering has correctly clustered the time series.
+        from sets import Set
+        clusters = Set([Set(cluster) for cluster in active])
+        assert Set([1,2]) in clusters, "Offset Clustering algorithm failed"
+        assert Set([0,3]) in clusters, "Offset Clustering algoirthm failed"
diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py
index 103bdaca..fa8f1be8 100644
--- a/GPy/util/cluster_with_offset.py
+++ b/GPy/util/cluster_with_offset.py
@@ -122,7 +122,7 @@ def cluster(data,inputs,verbose=False):
     
         if verbose:
             it +=1
-            print "Iteration %d" % it
+            print("Iteration %d" % it)
         
         #Compute the log-likelihood of each cluster (add them together)
         for clusti in range(len(active)):

From 16bc77a1edeeadeb377f794707e4752a4827630e Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 18:12:56 +0100
Subject: [PATCH 04/15] More useful message from testing re offset estimate

---
 GPy/testing/model_tests.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index ff137299..e2c05aec 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -421,8 +421,9 @@ class MiscTests(unittest.TestCase):
         Y = np.vstack([Y,Y])
 
         m = GPy.models.GPOffsetRegression(X,Y)
+        assert m.checkgrad(), "Gradients of offset parameters don't match numerical approximations."
         m.optimize()
-        assert np.abs(m.offset[0]-offset)<0.1, "GPOffsetRegression model failing to estimate correct offset."
+        assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))
 
 
 

From 04bcf56de12ed0da215a12e4d4edb450ac216dea Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 18:28:08 +0100
Subject: [PATCH 05/15] Added GPy import

---
 GPy/testing/util_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
index e82f7c1a..ce0beaaa 100644
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@@ -29,6 +29,7 @@
 #===============================================================================
 
 import unittest, numpy as np
+import GPy
 
 class TestDebug(unittest.TestCase):
     def test_checkFinite(self):

From ebe4a496a65d27ea8cb0289e1008ffac17ab3c98 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 4 Aug 2016 09:01:27 +0100
Subject: [PATCH 06/15] Corrected mistake in gradients: Was finding
 d(Xi-Xj)/dOffset instead of dr/dOffset. Fixed by scaling by kernel
 lengthscale.

---
 GPy/models/gp_offset_regression.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/GPy/models/gp_offset_regression.py b/GPy/models/gp_offset_regression.py
index 3699223e..2fd264f4 100644
--- a/GPy/models/gp_offset_regression.py
+++ b/GPy/models/gp_offset_regression.py
@@ -75,6 +75,10 @@ class GPOffsetRegression(GP):
             #print Gs[i]
             #print w
             dr_doffsets.append(dr_doffset)
+            
+        #lastly we need to divide by the lengthscale: So far we've found d(X_i - X_j)/dOffsets
+        #we want dr/dOffsets. (X_i - X_j)/lengthscale = r
+        dr_doffsets /= self.kern.lengthscale 
         return dr_doffsets
         
     def parameters_changed(self):

From 97f5ca6b8439fca8f365e4d9e5f23717f5cd4624 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 4 Aug 2016 09:26:40 +0100
Subject: [PATCH 07/15] Modified set code in test to work with python 2 and
 python 3.

---
 GPy/testing/util_tests.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
index ce0beaaa..ba3d7ddf 100644
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@@ -148,12 +148,10 @@ class TestDebug(unittest.TestCase):
                 [ 1.71881079],
                 [ 2.67162871],
                 [ 3.23761907]])]
-            
+                    
         #try doing the clustering
         active = GPy.util.cluster_with_offset.cluster(data,inputs)
-
         #check to see that the clustering has correctly clustered the time series.
-        from sets import Set
-        clusters = Set([Set(cluster) for cluster in active])
-        assert Set([1,2]) in clusters, "Offset Clustering algorithm failed"
-        assert Set([0,3]) in clusters, "Offset Clustering algoirthm failed"
+        clusters = set([frozenset(cluster) for cluster in active])
+        assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
+        assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"

From ad2779bdf3547b13656daed67b66284cb5652614 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 4 Aug 2016 10:11:30 +0100
Subject: [PATCH 08/15] made initial lengthscale!=1 to ensure we're properly
 testing gradients

---
 GPy/testing/model_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index e2c05aec..5d6de7ae 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -421,6 +421,7 @@ class MiscTests(unittest.TestCase):
         Y = np.vstack([Y,Y])
 
         m = GPy.models.GPOffsetRegression(X,Y)
+        m.rbf.lengthscale=5.0 #make it something other than one to check our gradients properly!
         assert m.checkgrad(), "Gradients of offset parameters don't match numerical approximations."
         m.optimize()
         assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))

From e019b17cf965a7f673326140767dbc48c4332e32 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-18-106.eu-west-1.compute.internal>
Date: Thu, 4 Aug 2016 12:31:36 +0000
Subject: [PATCH 09/15] Added threaded option - but this doesn't work due to
 the global interpreter lock

---
 GPy/util/__init__.py                     |   1 +
 GPy/util/cluster_with_offset.py          |   4 +
 GPy/util/threaded_cluster_with_offset.py | 206 +++++++++++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 GPy/util/threaded_cluster_with_offset.py

diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index 685551fd..136e0d3b 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -17,3 +17,4 @@ from . import multioutput
 from . import parallel
 from . import functions
 from . import cluster_with_offset
+from . import threaded_cluster_with_offset
diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py
index fa8f1be8..d1e896f1 100644
--- a/GPy/util/cluster_with_offset.py
+++ b/GPy/util/cluster_with_offset.py
@@ -3,6 +3,7 @@
 
 import GPy
 import numpy as np
+import sys #so I can print dots
 
 def get_log_likelihood(inputs,data,clust):
     """Get the LL of a combined set of clusters, ignoring time series offsets.
@@ -126,6 +127,9 @@ def cluster(data,inputs,verbose=False):
         
         #Compute the log-likelihood of each cluster (add them together)
         for clusti in range(len(active)):
+            if verbose:
+                sys.stdout.write('.')
+                sys.stdout.flush()
             if np.isnan(loglikes[clusti]):
                 loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
 
diff --git a/GPy/util/threaded_cluster_with_offset.py b/GPy/util/threaded_cluster_with_offset.py
new file mode 100644
index 00000000..66192c9f
--- /dev/null
+++ b/GPy/util/threaded_cluster_with_offset.py
@@ -0,0 +1,206 @@
+# Copyright (c) 2016, Mike Smith
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import GPy
+import numpy as np
+import time
+import sys #so I can print dots
+from threading import Thread
+maxthreads = 40
+
+    
+def get_log_likelihood(inputs,data,clust,loglikesarray):
+    """Get the LL of a combined set of clusters, ignoring time series offsets.
+    
+    Get the log likelihood of a cluster without worrying about the fact
+    different time series are offset. We're using it here really for those
+    cases in which we only have one cluster to get the loglikelihood of.
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    clust -- list of clusters to use
+    
+    returns a tuple:
+    log likelihood and the offset (which is always zero for this model)
+    """
+ 
+    S = data[0].shape[0] #number of time series
+    
+    #build a new dataset from the clusters, by combining all clusters together
+    X = np.zeros([0,1])
+    Y = np.zeros([0,S])
+    
+    #for each person in the cluster,
+    #add their inputs and data to the new dataset
+    for p in clust:
+        X = np.vstack([X,inputs[p]])
+        Y = np.vstack([Y,data[p].T])
+        
+    #find the loglikelihood. We just add together the LL for each time series.
+    #ll=0
+    #for s in range(S):
+    #    m = GPy.models.GPRegression(X,Y[:,s][:,None])
+    #    m.optimize()
+    #    ll+=m.log_likelihood()
+
+    m = GPy.models.GPRegression(X,Y)
+    m.optimize()
+    ll=m.log_likelihood()  
+    
+    loglikesarray[clust[0]] = ll
+    return
+
+def get_log_likelihood_offset(inputs,data,clust,loglikesarray,offsetarray):
+    """Get the log likelihood of a combined set of clusters, fitting the offsets
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    clust -- list of clusters to use
+    
+    returns a tuple:
+    log likelihood and the offset
+    """    
+        
+    assert len(clust)>1, "Use get_log_likelihood if you only have one cluster"
+        
+      
+                
+    S = data[0].shape[0] #number of time series
+        
+    X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
+    Y = np.zeros([0,S])
+    
+    #for each person in the cluster, add their inputs and data to the new
+    #dataset. Note we add an index identifying which person is which data point.
+    #This is for the offset model to use, to allow it to know which data points
+    #to shift.
+    for i,p in enumerate(clust):
+        idx = i*np.ones([inputs[p].shape[0],1])
+        X = np.vstack([X,np.hstack([inputs[p],idx])])
+        Y = np.vstack([Y,data[p].T])
+        
+    m = GPy.models.GPOffsetRegression(X,Y)
+    #TODO: How to select a sensible prior?
+    m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
+    #TODO: Set a sensible start value for the length scale,
+    #make it long to help the offset fit.
+    
+    m.optimize()
+    
+    ll = m.log_likelihood()
+    offset = m.offset.values[0]
+    #return ll,offset
+    loglikesarray[clust[0],clust[1]] = ll
+    offsetarray[clust[0],clust[1]] = offset
+    return
+
+def cluster(data,inputs,verbose=False):
+    """Clusters data
+    
+    Using the new offset model, this method uses a greedy algorithm to cluster
+    the data. It starts with all the data points in separate clusters and tests
+    whether combining them increases the overall log-likelihood (LL). It then
+    iteratively joins pairs of clusters which cause the greatest increase in
+    the LL, until no join increases the LL.
+    
+    arguments:
+    inputs -- the 'X's in a list, one item per cluster
+    data -- the 'Y's in a list, one item per cluster
+    
+    returns a list of the clusters.    
+    """
+    N=len(data)
+    
+    
+    #Define a set of N active cluster
+    active = []
+    for p in range(0,N):
+        active.append([p])
+
+    loglikes = np.zeros(len(active))
+    loglikes[:] = None
+
+    pairloglikes = np.zeros([len(active),len(active)])
+    pairloglikes[:] = None
+    pairoffset = np.zeros([len(active),len(active)])
+
+    it = 0
+    while True:
+    
+        if verbose:
+            it +=1
+            print("Iteration %d" % it)
+        
+        threads = []
+        #Compute the log-likelihood of each cluster (add them together)
+        for clusti in range(len(active)):
+            if verbose:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+            if np.isnan(loglikes[clusti]):
+                t = Thread(target=get_log_likelihood,args=(inputs,data,[clusti],loglikes))
+                threads.append(t)
+                #loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
+
+            #try combining with each other cluster...
+            for clustj in range(clusti): #count from 0 to clustj-1
+                temp = [clusti,clustj]
+                if np.isnan(pairloglikes[clusti,clustj]):
+                    #pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
+                    t = Thread(target=get_log_likelihood_offset,args=(inputs,data,temp,pairloglikes,pairoffset))
+                    threads.append(t)
+   
+        if len(threads)<=maxthreads:
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+
+        else: #should use a queue, as the method here assumes all threads take about same time, etc.
+            for i,t in enumerate(threads):
+                t.start()
+                if (i>=maxthreads):
+                    threads[i-maxthreads].join()
+            for i in range(len(threads)-maxthreads,len(threads)):
+                threads[i].join()
+
+        seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
+        loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
+        top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
+
+        #if loglikeimprovement.shape[0]<3:
+        # #no more clustering to do - this shouldn't happen really unless
+        # #we've set the threshold to apply clustering to less than 0
+        #    break 
+        
+        #if theres further clustering to be done...
+        if loglikeimprovement[top[0],top[1]]>0:
+            active[top[0]].extend(active[top[1]])
+            offset=pairoffset[top[0],top[1]]
+            inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
+            data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
+            del inputs[top[1]]
+            del data[top[1]]
+            del active[top[1]]
+
+            #None = message to say we need to recalculate
+            pairloglikes[:,top[0]] = None 
+            pairloglikes[top[0],:] = None 
+            pairloglikes = np.delete(pairloglikes,top[1],0)
+            pairloglikes = np.delete(pairloglikes,top[1],1)
+            loglikes[top[0]] = None
+            loglikes = np.delete(loglikes,top[1])
+        else:
+            break
+            
+        #if loglikeimprovement[top[0],top[1]]>0:
+        #    print "joined"
+        #    print top
+        #    print offset
+        #    print offsets
+        #    print offsets[top[1]]-offsets[top[0]]
+
+    #TODO Add a way to return the offsets applied to all the time series
+    return active 

From 6f4d03178ebdc1967d5aeefbb1ca7ca5ab3923b3 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Thu, 4 Aug 2016 14:47:08 +0100
Subject: [PATCH 10/15] Don't use message added to cluster code

---
 GPy/util/threaded_cluster_with_offset.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/GPy/util/threaded_cluster_with_offset.py b/GPy/util/threaded_cluster_with_offset.py
index 66192c9f..6818f5ec 100644
--- a/GPy/util/threaded_cluster_with_offset.py
+++ b/GPy/util/threaded_cluster_with_offset.py
@@ -1,6 +1,9 @@
 # Copyright (c) 2016, Mike Smith
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
+# Not recommended for use at the moment - the threading here doesn't
+# work due to the global interpreter lock.
+
 import GPy
 import numpy as np
 import time

From 81dc680a8acb72b46f16bc12d599f847680a3a98 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Tue, 9 Aug 2016 13:55:37 +0100
Subject: [PATCH 11/15] Push just to rerun testing

---
 GPy/models/gp_offset_regression.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/GPy/models/gp_offset_regression.py b/GPy/models/gp_offset_regression.py
index 2fd264f4..b843e08d 100644
--- a/GPy/models/gp_offset_regression.py
+++ b/GPy/models/gp_offset_regression.py
@@ -49,8 +49,7 @@ class GPOffsetRegression(GP):
         
     #def dL_doffset(self, X, sel):
     #    dL_dr = self.dK_dr_via_X(X, X) * dL_dK
-        
-        
+                
         
     def dr_doffset(self,X,sel,delta):
         #given an input matrix, X and the offsets (delta)

From bd758ac849eff3f13f58c61b7a41f0fc1f1a395f Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Tue, 9 Aug 2016 14:54:29 +0100
Subject: [PATCH 12/15] Removing 'threaded' version

---
 GPy/util/__init__.py                     |   1 -
 GPy/util/threaded_cluster_with_offset.py | 209 -----------------------
 2 files changed, 210 deletions(-)
 delete mode 100644 GPy/util/threaded_cluster_with_offset.py

diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index 136e0d3b..685551fd 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -17,4 +17,3 @@ from . import multioutput
 from . import parallel
 from . import functions
 from . import cluster_with_offset
-from . import threaded_cluster_with_offset
diff --git a/GPy/util/threaded_cluster_with_offset.py b/GPy/util/threaded_cluster_with_offset.py
deleted file mode 100644
index 6818f5ec..00000000
--- a/GPy/util/threaded_cluster_with_offset.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2016, Mike Smith
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Not recommended for use at the moment - the threading here doesn't
-# work due to the global interpreter lock.
-
-import GPy
-import numpy as np
-import time
-import sys #so I can print dots
-from threading import Thread
-maxthreads = 40
-
-    
-def get_log_likelihood(inputs,data,clust,loglikesarray):
-    """Get the LL of a combined set of clusters, ignoring time series offsets.
-    
-    Get the log likelihood of a cluster without worrying about the fact
-    different time series are offset. We're using it here really for those
-    cases in which we only have one cluster to get the loglikelihood of.
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    clust -- list of clusters to use
-    
-    returns a tuple:
-    log likelihood and the offset (which is always zero for this model)
-    """
- 
-    S = data[0].shape[0] #number of time series
-    
-    #build a new dataset from the clusters, by combining all clusters together
-    X = np.zeros([0,1])
-    Y = np.zeros([0,S])
-    
-    #for each person in the cluster,
-    #add their inputs and data to the new dataset
-    for p in clust:
-        X = np.vstack([X,inputs[p]])
-        Y = np.vstack([Y,data[p].T])
-        
-    #find the loglikelihood. We just add together the LL for each time series.
-    #ll=0
-    #for s in range(S):
-    #    m = GPy.models.GPRegression(X,Y[:,s][:,None])
-    #    m.optimize()
-    #    ll+=m.log_likelihood()
-
-    m = GPy.models.GPRegression(X,Y)
-    m.optimize()
-    ll=m.log_likelihood()  
-    
-    loglikesarray[clust[0]] = ll
-    return
-
-def get_log_likelihood_offset(inputs,data,clust,loglikesarray,offsetarray):
-    """Get the log likelihood of a combined set of clusters, fitting the offsets
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    clust -- list of clusters to use
-    
-    returns a tuple:
-    log likelihood and the offset
-    """    
-        
-    assert len(clust)>1, "Use get_log_likelihood if you only have one cluster"
-        
-      
-                
-    S = data[0].shape[0] #number of time series
-        
-    X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
-    Y = np.zeros([0,S])
-    
-    #for each person in the cluster, add their inputs and data to the new
-    #dataset. Note we add an index identifying which person is which data point.
-    #This is for the offset model to use, to allow it to know which data points
-    #to shift.
-    for i,p in enumerate(clust):
-        idx = i*np.ones([inputs[p].shape[0],1])
-        X = np.vstack([X,np.hstack([inputs[p],idx])])
-        Y = np.vstack([Y,data[p].T])
-        
-    m = GPy.models.GPOffsetRegression(X,Y)
-    #TODO: How to select a sensible prior?
-    m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
-    #TODO: Set a sensible start value for the length scale,
-    #make it long to help the offset fit.
-    
-    m.optimize()
-    
-    ll = m.log_likelihood()
-    offset = m.offset.values[0]
-    #return ll,offset
-    loglikesarray[clust[0],clust[1]] = ll
-    offsetarray[clust[0],clust[1]] = offset
-    return
-
-def cluster(data,inputs,verbose=False):
-    """Clusters data
-    
-    Using the new offset model, this method uses a greedy algorithm to cluster
-    the data. It starts with all the data points in separate clusters and tests
-    whether combining them increases the overall log-likelihood (LL). It then
-    iteratively joins pairs of clusters which cause the greatest increase in
-    the LL, until no join increases the LL.
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    
-    returns a list of the clusters.    
-    """
-    N=len(data)
-    
-    
-    #Define a set of N active cluster
-    active = []
-    for p in range(0,N):
-        active.append([p])
-
-    loglikes = np.zeros(len(active))
-    loglikes[:] = None
-
-    pairloglikes = np.zeros([len(active),len(active)])
-    pairloglikes[:] = None
-    pairoffset = np.zeros([len(active),len(active)])
-
-    it = 0
-    while True:
-    
-        if verbose:
-            it +=1
-            print("Iteration %d" % it)
-        
-        threads = []
-        #Compute the log-likelihood of each cluster (add them together)
-        for clusti in range(len(active)):
-            if verbose:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-            if np.isnan(loglikes[clusti]):
-                t = Thread(target=get_log_likelihood,args=(inputs,data,[clusti],loglikes))
-                threads.append(t)
-                #loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
-
-            #try combining with each other cluster...
-            for clustj in range(clusti): #count from 0 to clustj-1
-                temp = [clusti,clustj]
-                if np.isnan(pairloglikes[clusti,clustj]):
-                    #pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
-                    t = Thread(target=get_log_likelihood_offset,args=(inputs,data,temp,pairloglikes,pairoffset))
-                    threads.append(t)
-   
-        if len(threads)<=maxthreads:
-            for t in threads:
-                t.start()
-            for t in threads:
-                t.join()
-
-        else: #should use a queue, as the method here assumes all threads take about same time, etc.
-            for i,t in enumerate(threads):
-                t.start()
-                if (i>=maxthreads):
-                    threads[i-maxthreads].join()
-            for i in range(len(threads)-maxthreads,len(threads)):
-                threads[i].join()
-
-        seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
-        loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
-        top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
-
-        #if loglikeimprovement.shape[0]<3:
-        # #no more clustering to do - this shouldn't happen really unless
-        # #we've set the threshold to apply clustering to less than 0
-        #    break 
-        
-        #if theres further clustering to be done...
-        if loglikeimprovement[top[0],top[1]]>0:
-            active[top[0]].extend(active[top[1]])
-            offset=pairoffset[top[0],top[1]]
-            inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
-            data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
-            del inputs[top[1]]
-            del data[top[1]]
-            del active[top[1]]
-
-            #None = message to say we need to recalculate
-            pairloglikes[:,top[0]] = None 
-            pairloglikes[top[0],:] = None 
-            pairloglikes = np.delete(pairloglikes,top[1],0)
-            pairloglikes = np.delete(pairloglikes,top[1],1)
-            loglikes[top[0]] = None
-            loglikes = np.delete(loglikes,top[1])
-        else:
-            break
-            
-        #if loglikeimprovement[top[0],top[1]]>0:
-        #    print "joined"
-        #    print top
-        #    print offset
-        #    print offsets
-        #    print offsets[top[1]]-offsets[top[0]]
-
-    #TODO Add a way to return the offsets applied to all the time series
-    return active 

From 7135527fd00f0a385b65bfb32f9535c3859d1f8a Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 16 Aug 2016 11:17:15 +0100
Subject: [PATCH 13/15] [Add] add kernel swallowed parts fix #412

---
 GPy/kern/src/add.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py
index 99fe809b..ca20e5a9 100644
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@@ -13,15 +13,21 @@ class Add(CombinationKernel):
     propagates gradients through.
 
     This kernel will take over the active dims of it's subkernels passed in.
+    
+    NOTE: The subkernels will be copies of the original kernels, to prevent 
+    unexpected behavior. 
     """
     def __init__(self, subkerns, name='sum'):
-        for i, kern in enumerate(subkerns[:]):
+        _newkerns = []
+        for kern in subkerns:
             if isinstance(kern, Add):
-                del subkerns[i]
-                for part in kern.parts[::-1]:
+                for part in kern.parts:
                     #kern.unlink_parameter(part)
-                    subkerns.insert(i, part.copy())
-        super(Add, self).__init__(subkerns, name)
+                    _newkerns.append(part.copy())
+            else:
+                _newkerns.append(kern.copy())
+                    
+        super(Add, self).__init__(_newkerns, name)
         self._exact_psicomp = self._check_exact_psicomp()
 
     def _check_exact_psicomp(self):

From 9aa19b662d7d2f4ca52df428b4b060a1d12425d5 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 16 Aug 2016 11:19:04 +0100
Subject: [PATCH 14/15] [readme] added deploy status to readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e0e715a9..513f2704 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ The Gaussian processes framework in Python.
 * Travis-CI [unit-tests](https://travis-ci.org/SheffieldML/GPy)
 * [![licence](https://img.shields.io/badge/licence-BSD-blue.svg)](http://opensource.org/licenses/BSD-3-Clause)
 
-[![develstat](https://travis-ci.org/SheffieldML/GPy.svg?branch=devel)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)
+[![deploystat](https://travis-ci.org/SheffieldML/GPy.svg?branch=deploy)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)
 
 ## Updated Structure
 

From b6847366120ef1b4ad7232c8fb80ed26ad448802 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Tue, 16 Aug 2016 12:01:17 +0100
Subject: [PATCH 15/15] [plotting] small edit

---
 GPy/plotting/abstract_plotting_library.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/GPy/plotting/abstract_plotting_library.py b/GPy/plotting/abstract_plotting_library.py
index 64061b99..95377ed7 100644
--- a/GPy/plotting/abstract_plotting_library.py
+++ b/GPy/plotting/abstract_plotting_library.py
@@ -61,6 +61,8 @@ class AbstractPlottingLibrary(object):
         """
         Get a new figure with nrows and ncolumns subplots.
         Does not initialize the canvases yet.
+        
+        There is individual kwargs for the individual plotting libraries to use.
         """
         raise NotImplementedError("Implement all plot functions in AbstractPlottingLibrary in order to use your own plotting library")