From bd758ac849eff3f13f58c61b7a41f0fc1f1a395f Mon Sep 17 00:00:00 2001
From: Michael T Smith <lionfishy@gmail.com>
Date: Tue, 9 Aug 2016 14:54:29 +0100
Subject: [PATCH] Removing 'threaded' version

---
 GPy/util/__init__.py                     |   1 -
 GPy/util/threaded_cluster_with_offset.py | 209 -----------------------
 2 files changed, 210 deletions(-)
 delete mode 100644 GPy/util/threaded_cluster_with_offset.py

diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py
index 136e0d3b..685551fd 100644
--- a/GPy/util/__init__.py
+++ b/GPy/util/__init__.py
@@ -17,4 +17,3 @@ from . import multioutput
 from . import parallel
 from . import functions
 from . import cluster_with_offset
-from . import threaded_cluster_with_offset
diff --git a/GPy/util/threaded_cluster_with_offset.py b/GPy/util/threaded_cluster_with_offset.py
deleted file mode 100644
index 6818f5ec..00000000
--- a/GPy/util/threaded_cluster_with_offset.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2016, Mike Smith
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-# Not recommended for use at the moment - the threading here doesn't
-# work due to the global interpreter lock.
-
-import GPy
-import numpy as np
-import time
-import sys #so I can print dots
-from threading import Thread
-maxthreads = 40
-
-    
-def get_log_likelihood(inputs,data,clust,loglikesarray):
-    """Get the LL of a combined set of clusters, ignoring time series offsets.
-    
-    Get the log likelihood of a cluster without worrying about the fact
-    different time series are offset. We're using it here really for those
-    cases in which we only have one cluster to get the loglikelihood of.
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    clust -- list of clusters to use
-    
-    returns a tuple:
-    log likelihood and the offset (which is always zero for this model)
-    """
- 
-    S = data[0].shape[0] #number of time series
-    
-    #build a new dataset from the clusters, by combining all clusters together
-    X = np.zeros([0,1])
-    Y = np.zeros([0,S])
-    
-    #for each person in the cluster,
-    #add their inputs and data to the new dataset
-    for p in clust:
-        X = np.vstack([X,inputs[p]])
-        Y = np.vstack([Y,data[p].T])
-        
-    #find the loglikelihood. We just add together the LL for each time series.
-    #ll=0
-    #for s in range(S):
-    #    m = GPy.models.GPRegression(X,Y[:,s][:,None])
-    #    m.optimize()
-    #    ll+=m.log_likelihood()
-
-    m = GPy.models.GPRegression(X,Y)
-    m.optimize()
-    ll=m.log_likelihood()  
-    
-    loglikesarray[clust[0]] = ll
-    return
-
-def get_log_likelihood_offset(inputs,data,clust,loglikesarray,offsetarray):
-    """Get the log likelihood of a combined set of clusters, fitting the offsets
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    clust -- list of clusters to use
-    
-    returns a tuple:
-    log likelihood and the offset
-    """    
-        
-    assert len(clust)>1, "Use get_log_likelihood if you only have one cluster"
-        
-      
-                
-    S = data[0].shape[0] #number of time series
-        
-    X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
-    Y = np.zeros([0,S])
-    
-    #for each person in the cluster, add their inputs and data to the new
-    #dataset. Note we add an index identifying which person is which data point.
-    #This is for the offset model to use, to allow it to know which data points
-    #to shift.
-    for i,p in enumerate(clust):
-        idx = i*np.ones([inputs[p].shape[0],1])
-        X = np.vstack([X,np.hstack([inputs[p],idx])])
-        Y = np.vstack([Y,data[p].T])
-        
-    m = GPy.models.GPOffsetRegression(X,Y)
-    #TODO: How to select a sensible prior?
-    m.offset.set_prior(GPy.priors.Gaussian(0,20)) 
-    #TODO: Set a sensible start value for the length scale,
-    #make it long to help the offset fit.
-    
-    m.optimize()
-    
-    ll = m.log_likelihood()
-    offset = m.offset.values[0]
-    #return ll,offset
-    loglikesarray[clust[0],clust[1]] = ll
-    offsetarray[clust[0],clust[1]] = offset
-    return
-
-def cluster(data,inputs,verbose=False):
-    """Clusters data
-    
-    Using the new offset model, this method uses a greedy algorithm to cluster
-    the data. It starts with all the data points in separate clusters and tests
-    whether combining them increases the overall log-likelihood (LL). It then
-    iteratively joins pairs of clusters which cause the greatest increase in
-    the LL, until no join increases the LL.
-    
-    arguments:
-    inputs -- the 'X's in a list, one item per cluster
-    data -- the 'Y's in a list, one item per cluster
-    
-    returns a list of the clusters.    
-    """
-    N=len(data)
-    
-    
-    #Define a set of N active cluster
-    active = []
-    for p in range(0,N):
-        active.append([p])
-
-    loglikes = np.zeros(len(active))
-    loglikes[:] = None
-
-    pairloglikes = np.zeros([len(active),len(active)])
-    pairloglikes[:] = None
-    pairoffset = np.zeros([len(active),len(active)])
-
-    it = 0
-    while True:
-    
-        if verbose:
-            it +=1
-            print("Iteration %d" % it)
-        
-        threads = []
-        #Compute the log-likelihood of each cluster (add them together)
-        for clusti in range(len(active)):
-            if verbose:
-                sys.stdout.write('.')
-                sys.stdout.flush()
-            if np.isnan(loglikes[clusti]):
-                t = Thread(target=get_log_likelihood,args=(inputs,data,[clusti],loglikes))
-                threads.append(t)
-                #loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
-
-            #try combining with each other cluster...
-            for clustj in range(clusti): #count from 0 to clustj-1
-                temp = [clusti,clustj]
-                if np.isnan(pairloglikes[clusti,clustj]):
-                    #pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
-                    t = Thread(target=get_log_likelihood_offset,args=(inputs,data,temp,pairloglikes,pairoffset))
-                    threads.append(t)
-   
-        if len(threads)<=maxthreads:
-            for t in threads:
-                t.start()
-            for t in threads:
-                t.join()
-
-        else: #should use a queue, as the method here assumes all threads take about same time, etc.
-            for i,t in enumerate(threads):
-                t.start()
-                if (i>=maxthreads):
-                    threads[i-maxthreads].join()
-            for i in range(len(threads)-maxthreads,len(threads)):
-                threads[i].join()
-
-        seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
-        loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
-        top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
-
-        #if loglikeimprovement.shape[0]<3:
-        # #no more clustering to do - this shouldn't happen really unless
-        # #we've set the threshold to apply clustering to less than 0
-        #    break 
-        
-        #if theres further clustering to be done...
-        if loglikeimprovement[top[0],top[1]]>0:
-            active[top[0]].extend(active[top[1]])
-            offset=pairoffset[top[0],top[1]]
-            inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
-            data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
-            del inputs[top[1]]
-            del data[top[1]]
-            del active[top[1]]
-
-            #None = message to say we need to recalculate
-            pairloglikes[:,top[0]] = None 
-            pairloglikes[top[0],:] = None 
-            pairloglikes = np.delete(pairloglikes,top[1],0)
-            pairloglikes = np.delete(pairloglikes,top[1],1)
-            loglikes[top[0]] = None
-            loglikes = np.delete(loglikes,top[1])
-        else:
-            break
-            
-        #if loglikeimprovement[top[0],top[1]]>0:
-        #    print "joined"
-        #    print top
-        #    print offset
-        #    print offsets
-        #    print offsets[top[1]]-offsets[top[0]]
-
-    #TODO Add a way to return the offsets applied to all the time series
-    return active