From ea8b7321815ea07ef0a1688668ed03c6b55514f0 Mon Sep 17 00:00:00 2001 From: Michael T Smith Date: Wed, 3 Aug 2016 17:49:01 +0100 Subject: [PATCH] Corrected v2 missing print brackets. Added test code for new model and util --- GPy/testing/model_tests.py | 18 ++++++++++++ GPy/testing/util_tests.py | 49 +++++++++++++++++++++++++++++++++ GPy/util/cluster_with_offset.py | 2 +- 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index e4411e23..ff137299 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -405,6 +405,24 @@ class MiscTests(unittest.TestCase): warp_m.plot() warp_f.plot(X.min()-10, X.max()+10) plt.show() + + def test_offset_regression(self): + #Tests GPy.models.GPOffsetRegression. Using two small time series + #from a sine wave, we confirm the algorithm determines that the + #likelihood is maximised when the offset hyperparameter is approximately + #equal to the actual offset in X between the two time series. + offset = 3 + X1 = np.arange(0,50,5.0)[:,None] + X2 = np.arange(0+offset,50+offset,5.0)[:,None] + X = np.vstack([X1,X2]) + ind = np.vstack([np.zeros([10,1]),np.ones([10,1])]) + X = np.hstack([X,ind]) + Y = np.sin((X[0:10,0])/30.0)[:,None] + Y = np.vstack([Y,Y]) + + m = GPy.models.GPOffsetRegression(X,Y) + m.optimize() + assert np.abs(m.offset[0]-offset)<0.1, "GPOffsetRegression model failing to estimate correct offset." diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py index 3c6241f3..e82f7c1a 100644 --- a/GPy/testing/util_tests.py +++ b/GPy/testing/util_tests.py @@ -107,3 +107,52 @@ class TestDebug(unittest.TestCase): self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5]) self.assertTrue(d[tuple(X[:,1])] == [1]) + def test_offset_cluster(self): + #Tests the GPy.util.cluster_with_offset.cluster utility with a small + #test data set. Not using random noise just in case it occasionally + #causes it not to cluster correctly. + #groundtruth cluster identifiers are: [0,1,1,0] + + #data contains a list of the four sets of time series (3 per data point) + + data = [np.array([[ 2.18094245, 1.96529789, 2.00265523, 2.18218742, 2.06795428], + [ 1.62254829, 1.75748448, 1.83879347, 1.87531326, 1.52503496], + [ 1.54589609, 1.61607914, 2.00463192, 1.48771394, 1.63339218]]), + np.array([[ 2.86766106, 2.97953437, 2.91958876, 2.92510506, 3.03239241], + [ 2.57368423, 2.59954886, 3.10000395, 2.75806125, 2.89865704], + [ 2.58916318, 2.53698259, 2.63858411, 2.63102504, 2.51853901]]), + np.array([[ 2.77834168, 2.9618564 , 2.88482141, 3.24259745, 2.9716821 ], + [ 2.60675576, 2.67095624, 2.94824436, 2.80520631, 2.87247516], + [ 2.49543562, 2.5492281 , 2.6505866 , 2.65015308, 2.59738616]]), + np.array([[ 1.76783086, 2.21666738, 2.07939706, 1.9268263 , 2.23360121], + [ 1.94305547, 1.94648592, 2.1278921 , 2.09481457, 2.08575238], + [ 1.69336013, 1.72285186, 1.6339506 , 1.61212022, 1.39198698]])] + + #inputs contains their associated X values + + inputs = [np.array([[ 0. ], + [ 0.68040097], + [ 1.20316795], + [ 1.798749 ], + [ 2.14891733]]), np.array([[ 0. ], + [ 0.51910637], + [ 0.98259352], + [ 1.57442965], + [ 1.82515098]]), np.array([[ 0. ], + [ 0.66645478], + [ 1.59464591], + [ 1.69769551], + [ 1.80932752]]), np.array([[ 0. ], + [ 0.87512108], + [ 1.71881079], + [ 2.67162871], + [ 3.23761907]])] + + #try doing the clustering + active = GPy.util.cluster_with_offset.cluster(data,inputs) + + #check to see that the clustering has correctly clustered the time series. + from sets import Set + clusters = Set([Set(cluster) for cluster in active]) + assert Set([1,2]) in clusters, "Offset Clustering algorithm failed" + assert Set([0,3]) in clusters, "Offset Clustering algoirthm failed" diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py index 103bdaca..fa8f1be8 100644 --- a/GPy/util/cluster_with_offset.py +++ b/GPy/util/cluster_with_offset.py @@ -122,7 +122,7 @@ def cluster(data,inputs,verbose=False): if verbose: it +=1 - print "Iteration %d" % it + print("Iteration %d" % it) #Compute the log-likelihood of each cluster (add them together) for clusti in range(len(active)):