From ea8b7321815ea07ef0a1688668ed03c6b55514f0 Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Wed, 3 Aug 2016 17:49:01 +0100
Subject: [PATCH] Corrected v2 missing print brackets. Added test code for new
 model and util

---
 GPy/testing/model_tests.py      | 18 ++++++++++++
 GPy/testing/util_tests.py       | 49 +++++++++++++++++++++++++++++++++
 GPy/util/cluster_with_offset.py |  2 +-
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index e4411e23..ff137299 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -405,6 +405,24 @@ class MiscTests(unittest.TestCase):
         warp_m.plot()
         warp_f.plot(X.min()-10, X.max()+10)
         plt.show()
+        
+    def test_offset_regression(self):
+        #Tests GPy.models.GPOffsetRegression. Using two small time series
+        #from a sine wave, we confirm the algorithm determines that the
+        #likelihood is maximised when the offset hyperparameter is approximately
+        #equal to the actual offset in X between the two time series.
+        offset = 3
+        X1 = np.arange(0,50,5.0)[:,None]
+        X2 = np.arange(0+offset,50+offset,5.0)[:,None]
+        X = np.vstack([X1,X2])
+        ind = np.vstack([np.zeros([10,1]),np.ones([10,1])])
+        X = np.hstack([X,ind])
+        Y = np.sin((X[0:10,0])/30.0)[:,None]
+        Y = np.vstack([Y,Y])
+
+        m = GPy.models.GPOffsetRegression(X,Y)
+        m.optimize()
+        assert np.abs(m.offset[0]-offset)<0.1, "GPOffsetRegression model failing to estimate correct offset."
 
 
 
diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
index 3c6241f3..e82f7c1a 100644
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@@ -107,3 +107,52 @@ class TestDebug(unittest.TestCase):
         self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5])
         self.assertTrue(d[tuple(X[:,1])] == [1])
 
+    def test_offset_cluster(self):
+        #Tests the GPy.util.cluster_with_offset.cluster utility with a small
+        #test data set. Not using random noise just in case it occasionally
+        #causes it not to cluster correctly.
+        #groundtruth cluster identifiers are: [0,1,1,0]
+        
+        #data contains a list of the four sets of time series (3 per data point)      
+
+        data = [np.array([[ 2.18094245,  1.96529789,  2.00265523,  2.18218742,  2.06795428],
+                [ 1.62254829,  1.75748448,  1.83879347,  1.87531326,  1.52503496],
+                [ 1.54589609,  1.61607914,  2.00463192,  1.48771394,  1.63339218]]),
+         np.array([[ 2.86766106,  2.97953437,  2.91958876,  2.92510506,  3.03239241],
+                [ 2.57368423,  2.59954886,  3.10000395,  2.75806125,  2.89865704],
+                [ 2.58916318,  2.53698259,  2.63858411,  2.63102504,  2.51853901]]),
+         np.array([[ 2.77834168,  2.9618564 ,  2.88482141,  3.24259745,  2.9716821 ],
+                [ 2.60675576,  2.67095624,  2.94824436,  2.80520631,  2.87247516],
+                [ 2.49543562,  2.5492281 ,  2.6505866 ,  2.65015308,  2.59738616]]),
+         np.array([[ 1.76783086,  2.21666738,  2.07939706,  1.9268263 ,  2.23360121],
+                [ 1.94305547,  1.94648592,  2.1278921 ,  2.09481457,  2.08575238],
+                [ 1.69336013,  1.72285186,  1.6339506 ,  1.61212022,  1.39198698]])]
+
+        #inputs contains their associated X values
+        
+        inputs = [np.array([[ 0.        ],
+                [ 0.68040097],
+                [ 1.20316795],
+                [ 1.798749  ],
+                [ 2.14891733]]), np.array([[ 0.        ],
+                [ 0.51910637],
+                [ 0.98259352],
+                [ 1.57442965],
+                [ 1.82515098]]), np.array([[ 0.        ],
+                [ 0.66645478],
+                [ 1.59464591],
+                [ 1.69769551],
+                [ 1.80932752]]), np.array([[ 0.        ],
+                [ 0.87512108],
+                [ 1.71881079],
+                [ 2.67162871],
+                [ 3.23761907]])]
+            
+        #try doing the clustering
+        active = GPy.util.cluster_with_offset.cluster(data,inputs)
+
+        #check to see that the clustering has correctly clustered the time series.
+        from sets import Set
+        clusters = Set([Set(cluster) for cluster in active])
+        assert Set([1,2]) in clusters, "Offset Clustering algorithm failed"
+        assert Set([0,3]) in clusters, "Offset Clustering algoirthm failed"
diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py
index 103bdaca..fa8f1be8 100644
--- a/GPy/util/cluster_with_offset.py
+++ b/GPy/util/cluster_with_offset.py
@@ -122,7 +122,7 @@ def cluster(data,inputs,verbose=False):
     
         if verbose:
             it +=1
-            print "Iteration %d" % it
+            print("Iteration %d" % it)
         
         #Compute the log-likelihood of each cluster (add them together)
         for clusti in range(len(active)):