diff --git a/GPy/models/gp_offset_regression.py b/GPy/models/gp_offset_regression.py
index b843e08d..f7a595cf 100644
--- a/GPy/models/gp_offset_regression.py
+++ b/GPy/models/gp_offset_regression.py
@@ -86,7 +86,7 @@ class GPOffsetRegression(GP):
         self.X = self.X_fixed - offsets[self.selected]
         super(GPOffsetRegression, self).parameters_changed()
         
-        dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
+        dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK'] #TODO could use gradients_X, instead of r.
         
         dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
         for i in range(len(dr_doff)):
diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py
index d1e896f1..29f1e9ff 100644
--- a/GPy/util/cluster_with_offset.py
+++ b/GPy/util/cluster_with_offset.py
@@ -5,12 +5,10 @@ import GPy
 import numpy as np
 import sys #so I can print dots
 
-def get_log_likelihood(inputs,data,clust):
-    """Get the LL of a combined set of clusters, ignoring time series offsets.
+def get_individual_log_likelihood_offset(inputs,data,clust,common_kern):
+    """Get the LL of a pair of clusters, but having them independent
     
-    Get the log likelihood of a cluster without worrying about the fact
-    different time series are offset. We're using it here really for those
-    cases in which we only have one cluster to get the loglikelihood of.
+    Get the log likelihood of a cluster pair.
     
     arguments:
     inputs -- the 'X's in a list, one item per cluster
@@ -18,7 +16,7 @@ def get_log_likelihood(inputs,data,clust):
     clust -- list of clusters to use
     
     returns a tuple:
-    log likelihood and the offset (which is always zero for this model)
+    log likelihood and the offset
     """
  
     S = data[0].shape[0] #number of time series
@@ -29,23 +27,22 @@ def get_log_likelihood(inputs,data,clust):
     
     #for each person in the cluster,
     #add their inputs and data to the new dataset
-    for p in clust:
+    idx = np.zeros([0,1])
+    for it,p in enumerate(clust):
         X = np.vstack([X,inputs[p]])
         Y = np.vstack([Y,data[p].T])
-        
-    #find the loglikelihood. We just add together the LL for each time series.
-    #ll=0
-    #for s in range(S):
-    #    m = GPy.models.GPRegression(X,Y[:,s][:,None])
-    #    m.optimize()
-    #    ll+=m.log_likelihood()
+        idx = np.r_[idx,np.ones([data[p].shape[1],1])*it]
 
-    m = GPy.models.GPRegression(X,Y)
+    X = np.c_[X,idx]
+    #print(X)
+    k_independent = GPy.kern.IndependentOutputs(common_kern.copy(),index_dim=1)
+
+    m = GPy.models.GPRegression(X,Y,k_independent)
     m.optimize()
     ll=m.log_likelihood()    
     return ll,0
 
-def get_log_likelihood_offset(inputs,data,clust):
+def get_shared_log_likelihood_offset(inputs,data,clust,common_kern):
     """Get the log likelihood of a combined set of clusters, fitting the offsets
     
     arguments:
@@ -56,18 +53,13 @@ def get_log_likelihood_offset(inputs,data,clust):
     returns a tuple:
     log likelihood and the offset
     """    
-        
-    #if we've only got one cluster, the model has an error, so we want to just
-    #use normal GPRegression.
-    if len(clust)==1: 
-        return get_log_likelihood(inputs,data,clust)
-                
+
     S = data[0].shape[0] #number of time series
         
     X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
     Y = np.zeros([0,S])
     
-    #for each person in the cluster, add their inputs and data to the new
+    #for each cluster, add their inputs and data to the new
     #dataset. Note we add an index identifying which person is which data point.
     #This is for the offset model to use, to allow it to know which data points
     #to shift.
@@ -75,10 +67,11 @@ def get_log_likelihood_offset(inputs,data,clust):
         idx = i*np.ones([inputs[p].shape[0],1])
         X = np.vstack([X,np.hstack([inputs[p],idx])])
         Y = np.vstack([Y,data[p].T])
-        
-    m = GPy.models.GPOffsetRegression(X,Y)
+                      
+    m = GPy.models.GPOffsetRegression(X,Y,common_kern.copy())
+
     #TODO: How to select a sensible prior?
-    m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
+   ## m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
     #TODO: Set a sensible start value for the length scale,
     #make it long to help the offset fit.
     
@@ -88,7 +81,7 @@ def get_log_likelihood_offset(inputs,data,clust):
     offset = m.offset.values[0]
     return ll,offset
 
-def cluster(data,inputs,verbose=False):
+def cluster(data,inputs,common_kern,verbose=False):
     """Clusters data
     
     Using the new offset model, this method uses a greedy algorithm to cluster
@@ -111,12 +104,11 @@ def cluster(data,inputs,verbose=False):
     for p in range(0,N):
         active.append([p])
 
-    loglikes = np.zeros(len(active))
-    loglikes[:] = None
-
-    pairloglikes = np.zeros([len(active),len(active)])
-    pairloglikes[:] = None
-    pairoffset = np.zeros([len(active),len(active)])
+    individualloglikes = np.zeros([len(active),len(active)])
+    individualloglikes[:] = None
+    sharedloglikes = np.zeros([len(active),len(active)])
+    sharedloglikes[:] = None
+    sharedoffset = np.zeros([len(active),len(active)])
 
     it = 0
     while True:
@@ -125,23 +117,17 @@ def cluster(data,inputs,verbose=False):
             it +=1
             print("Iteration %d" % it)
         
-        #Compute the log-likelihood of each cluster (add them together)
+        #Compute the log-likelihood of each cluster
         for clusti in range(len(active)):
-            if verbose:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-            if np.isnan(loglikes[clusti]):
-                loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
-
             #try combining with each other cluster...
             for clustj in range(clusti): #count from 0 to clustj-1
                 temp = [clusti,clustj]
-                if np.isnan(pairloglikes[clusti,clustj]):
-                    pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
+                if np.isnan(sharedloglikes[clusti,clustj]):
+                    sharedloglikes[clusti,clustj],sharedoffset[clusti,clustj] = get_shared_log_likelihood_offset(inputs,data,temp,common_kern)    
+                    individualloglikes[clusti,clustj], unused_offset = get_individual_log_likelihood_offset(inputs,data,temp,common_kern)
 
-        seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
-        loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
-        top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
+        loglikeimprovement = sharedloglikes - individualloglikes #how much likelihood improves with clustering
+        top = np.unravel_index(np.nanargmax(sharedloglikes-individualloglikes), sharedloglikes.shape)
 
         #if loglikeimprovement.shape[0]<3:
         # #no more clustering to do - this shouldn't happen really unless
@@ -149,9 +135,15 @@ def cluster(data,inputs,verbose=False):
         #    break 
         
         #if theres further clustering to be done...
+       # print("SHARED")
+       # print(sharedloglikes)
+       # print("IND")
+       # print(individualloglikes)        
+       # print("----")
         if loglikeimprovement[top[0],top[1]]>0:
+            #print(loglikeimprovement[top[0],top[1]])
             active[top[0]].extend(active[top[1]])
-            offset=pairoffset[top[0],top[1]]
+            offset=sharedoffset[top[0],top[1]]
             inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
             data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
             del inputs[top[1]]
@@ -159,12 +151,14 @@ def cluster(data,inputs,verbose=False):
             del active[top[1]]
 
             #None = message to say we need to recalculate
-            pairloglikes[:,top[0]] = None 
-            pairloglikes[top[0],:] = None 
-            pairloglikes = np.delete(pairloglikes,top[1],0)
-            pairloglikes = np.delete(pairloglikes,top[1],1)
-            loglikes[top[0]] = None
-            loglikes = np.delete(loglikes,top[1])
+            sharedloglikes[:,top[0]] = None 
+            sharedloglikes[top[0],:] = None 
+            sharedloglikes = np.delete(sharedloglikes,top[1],0)
+            sharedloglikes = np.delete(sharedloglikes,top[1],1)
+            individualloglikes[:,top[0]] = None 
+            individualloglikes[top[0],:] = None 
+            individualloglikes = np.delete(individualloglikes,top[1],0)
+            individualloglikes = np.delete(individualloglikes,top[1],1)
         else:
             break