Clustering using direct model comparison

This commit is contained in:
Michael T Smith 2016-11-24 18:04:33 +00:00
parent f2f12787a7
commit be77e35b23
5 changed files with 396 additions and 79 deletions

View file

@ -27,6 +27,9 @@ from .src.eq_ode2 import EQ_ODE2
from .src.integral import Integral
from .src.integral_limits import Integral_Limits
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
from .src.offset import Offset
from .src.tie import Tie
from .src.eq_ode1 import EQ_ODE1
from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime

72
GPy/kern/src/offset.py Normal file
View file

@ -0,0 +1,72 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Written by Mike Smith. michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Offset(Kern):
"""
A kernel in which subsets of the data can be shifted.
:offsets list of offsets of length of number of subsets
"""
def __init__(self, kernel, input_dim, offsets=[], index_dim=-1, active_dims=None, name='offset'):
super(Offset, self).__init__(input_dim, active_dims, name)
assert isinstance(index_dim, int), "Offset kernel is only defined with one input dimension being the index"
self.kern = kernel
self.offsets = Param('offset', offsets)
self.link_parameter(self.offsets)
self.offset_index_dim = index_dim
self.link_parameters(kernel) #maybe whole class should inherit from CombinationKernel?
def shift(self,X):
self.selected = np.array([int(x) for x in X[:,self.offset_index_dim]])
offsets = np.hstack([0.0,self.offsets.values])[:,None]
return X - offsets[self.selected]
def K(self,X ,X2=None):
return self.kern.K(self.shift(X),X2)
def Kdiag(self,X):
return self.kern.Kdiag(self.shift(X))
def dX_doffset(self,sel,delta):
#given the select array (sel) and the offsets (delta)
#finds dX/dDelta
#returns them as a 2d matrix, one row/col[?] for each offset (delta).
#a matrix G represents the effect of increasing the offset on the X_i passed to the kernel for each input. For example
#what effect will increasing offset 4 have on the kernel output of input 5? Answer: Gs[4,5]... (positive or zero)
Gs = []
for i,d in enumerate(delta):
G = np.array(sel==(i+1))[:,None]*1
Gs.append(G)
return np.array(Gs)*2.0
def update_gradients_full(self,dL_dK,X,X2=None):
dL_dX = self.kern.gradients_X(dL_dK, self.shift(X), X)
dX_doff = self.dX_doffset(self.selected,self.offsets.values)
for i in range(len(dX_doff)):
dL_doff = dL_dX * dX_doff[i]
self.offsets.gradient[i] = -np.sum(dL_doff)
self.kern.update_gradients_full(dL_dK,self.shift(X),X2)
def gradients_X(self,dL_dK, X, X2=None):
return self.kern.gradients_X(dL_dK,self.shift(X),X2)
def gradients_X_diag(self, dL_dKdiag, X):
return self.kern.gradients_X_diag(dL_dKdiag, self.shift(X))
def update_gradients_diag(self, dL_dKdiag, X): #TODO What is this?
raise NotImplementedError("Not implemented Offset.update_gradients_diag")
#self.kern.update_gradients_diag(dL_dKdiag, self.shift(X))

109
GPy/kern/src/tie.py Normal file
View file

@ -0,0 +1,109 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Written by Mike Smith. michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Tie(Kern):
"""
A kernel which takes another kernel and ties its parameters together
:tied_param_list a list of lists of parameters to tie together. Each item
in the inner list is a regex, so one could use
['independ.offset.Mat32.lengthscale',
'independ.Mat32_1.lengthscale',
'independ.Mat32.lengthscale']
or just
['.*lengthscale']
"""
def __init__(self, kernel, input_dim, tied_param_list, active_dims=None, name='tie'):
#TODO We need to initialise the values of the parameters to be equal!
super(Tie, self).__init__(input_dim, active_dims, name)
self.kern = kernel
self.params = [] #list of parameter objects to tie together
for tlist in tied_param_list:
plist = [] #temp array for list of parameter objects for each tie
assert type(tlist) is list, "The tied_param_list should be a list of lists of strings"
for t in tlist: #expand regex in each inner list and add all matches
plist.extend(self.kern.grep_param_names(t))
if len(plist)==0:
print("Warning: No parameters were added for (%s)" % str(tlist))
else:
self.params.append(plist)
self.link_parameters(self.kern)
def get_totals(self,param_list):
"""
Returns the sum total of the gradients and values of the parameters in
the param_list
"""
v = None
g = None
for p in param_list:
if v is None:
v = p.values.copy()
g = p.gradient.copy()
else:
v += p.values
g += p.gradient
return v,g
def update_gradients_full(self,dL_dK,X,X2=None):
self.kern.update_gradients_full(dL_dK,X,X2)
for pitem in self.params:
l = len(pitem)
v,g = self.get_totals(pitem)
for p in pitem:
#p.param_array[:] = v/l #TODO: Just do once in __init__
p.gradient = g/l #pitem['main'].gradient
def gradients_X(self,dL_dK, X, X2=None):
return self.kern.gradients_X(dL_dK, X, X2=None)
def gradients_X_diag(self, dL_dKdiag, X):
return self.kern.gradients_X_diag(dL_dKdiag, X)
def K(self,X ,X2=None):
return self.kern.K(X,X2)
def Kdiag(self,X):
return self.kern.Kdiag(X)
def get_ties_names(self,html=False):
textlist = []
if html:
lb = "<br />"
else:
lb = "\n"
for ps in self.params:
innerlist = []
for p in ps:
innerlist.append(p.hierarchy_name())
textlist.append(innerlist)
st = lb+lb
st += "The following sets of parameters are tied:"
for texts in textlist:
st += lb
st += lb.join(texts)
st += lb
return st
def __str__(self, header=True, VT100=True):
st = super(Tie, self).__str__(header, VT100)
st += self.get_ties_names()
return st
def _repr_html_(self, header=True):
toprint = super(Tie,self)._repr_html_(header)
toprint+= self.get_ties_names(html=True)
return toprint

View file

@ -4,60 +4,72 @@
import GPy
import numpy as np
def add_index_column(inputs,data,clust):
def get_log_likelihood_diff(inputs,data,clusti,clustj,common_kern,noise_scale):
#offset indicies
ind_offset = np.vstack([np.zeros([N,1]),np.ones([N,1]),nans])
S = data[0].shape[0] #number of time series
X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
Y = np.zeros([0,S])
#for each cluster, add their inputs and data to the new
#dataset. Note we add an index identifying which person is which data point.
#This is for the offset model to use, to allow it to know which data points
#to shift.
for i,p in enumerate(clust):
idx = i*np.ones([inputs[p].shape[0],1])
X = np.vstack([X,np.hstack([inputs[p],idx])])
Y = np.vstack([Y,data[p].T])
return X,Y
def get_individual_log_likelihood_offset(inputs,data,clust,common_kern):
"""Get the LL of a pair of clusters, but having them independent
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
clust -- list of clusters to use
returns log likelihood
"""
X,Y = add_index_column(inputs,data,clust)
k_independent = GPy.kern.IndependentOutputs(common_kern.copy(),index_dim=1)
m = GPy.models.GPRegression(X,Y,k_independent)
m.optimize()
ll=m.log_likelihood()
return ll
#independent output indicies
ind_indpoutputs = np.vstack([np.zeros([N*2,1]),np.ones([N,1]),np.ones([N,1])*2])
X = np.hstack([X,ind_offset,ind_indpoutputs])
Y1 = np.sin((X[0:N,0])/10.0)[:,None]
#Y2 = np.sin((X[0:N,0])/10.0)[:,None]
Y2 = np.cos((X[0:N,0])/10.0)[:,None]
Y1 += np.random.randn(Y1.shape[0],Y1.shape[1])*0.1
Y2 += np.random.randn(Y2.shape[0],Y2.shape[1])*0.1
Y = np.vstack([Y1,Y2,Y1,Y2])
def get_shared_log_likelihood_offset(inputs,data,clust,common_kern):
"""Get the log likelihood of a combined set of clusters, fitting the offsets
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
clust -- list of clusters to use
returns a tuple:
log likelihood and the offset
"""
X,Y = add_index_column(inputs,data,clust)
m = GPy.models.GPOffsetRegression(X,Y,common_kern.copy())
# m.offset.set_prior(GPy.priors.Gaussian(0,20))
m.optimize()
ll = m.log_likelihood()
offset = m.offset.values[0]
return ll,offset
#Structure of inputs:
# actual input | offset_kernel_index | indp_output_index
# 2.4 0 0
# 2.9 0 0
# 3.4 1 0
# 3.9 1 0
# 2.4 nan 1
# 2.9 nan 1
# 3.4 nan 2
# 3.9 nan 2
#print X
#print Y
def cluster(data,inputs,common_kern,verbose=False):
#base kernel to explain all time series with
common_kern = GPy.kern.Matern32(input_dim=1)
#the offset kernel, that can shift one time series wrt another
offset_kern = GPy.kern.Offset(common_kern,2,[0])
#we want to discourage massive offsets, which can achieve good fits by simply moving the two datasets far apart
offset_kern.offset.set_prior(GPy.priors.Gaussian(0,4.0))
#our overall kernel contains our offset kernel and two common kernels
independent_kern = GPy.kern.IndependentOutputs([offset_kern,common_kern.copy(),common_kern.copy()],index_dim=2)
tiekern = GPy.kern.Tie(independent_kern,3,[['.*lengthscale'],['.*variance']])
model = GPy.models.GPRegression(X,Y,tiekern)
model.optimize()
#base kernel to explain all time series with
common_kern = GPy.kern.Matern32(input_dim=1)
#the offset kernel, that can shift one time series wrt another
offset_kern = GPy.kern.Offset(common_kern,2,[0])
#we want to discourage massive offsets, which can achieve good fits by simply moving the two datasets far apart
offset_kern.offset.set_prior(GPy.priors.Gaussian(0,4.0))
#our overall kernel contains our offset kernel and two common kernels
independent_kern = GPy.kern.IndependentOutputs([common_kern.copy(),common_kern.copy()],index_dim=1)
independent_model = GPy.models.GPRegression(indepX,indepY,independent_kern)
offset_model = GPy.models.GPRegression(offsetX,offsetY,offset_kern)
def cluster(data,inputs,common_kern,noise_scale=1.0,verbose=False):
"""Clusters data
Using the new offset model, this method uses a greedy algorithm to cluster
@ -74,55 +86,44 @@ def cluster(data,inputs,common_kern,verbose=False):
"""
N=len(data)
#Define a set of N active cluster
active = []
for p in range(0,N):
active.append([p])
individualloglikes = np.zeros([len(active),len(active)])
individualloglikes[:] = None
sharedloglikes = np.zeros([len(active),len(active)])
sharedloglikes[:] = None
sharedoffset = np.zeros([len(active),len(active)])
diffloglikes = np.zeros([len(active),len(active)])
diffloglikes[:] = None
offsets = np.zeros([len(active),len(active)])
it = 0
while True:
if verbose:
it +=1
print("Iteration %d" % it)
#Compute the log-likelihood of each cluster
for clusti in range(len(active)):
#try combining with each other cluster...
for clusti in range(len(active)):
for clustj in range(clusti): #count from 0 to clustj-1
temp = [clusti,clustj]
if np.isnan(sharedloglikes[clusti,clustj]):
sharedloglikes[clusti,clustj],sharedoffset[clusti,clustj] = get_shared_log_likelihood_offset(inputs,data,temp,common_kern)
individualloglikes[clusti,clustj] = get_individual_log_likelihood_offset(inputs,data,temp,common_kern)
if np.isnan(diffloglikes[clusti,clustj]):
diffloglikes[clusti,clustj],offsets[clusti,clustj] = get_log_likelihood_diff(inputs,data,[clusti,clustj],common_kern,noise_scale)
loglikeimprovement = sharedloglikes - individualloglikes #how much likelihood improves with clustering
top = np.unravel_index(np.nanargmax(sharedloglikes-individualloglikes), sharedloglikes.shape)
#np.fill_diagonal(diffloglikes,np.nan)
if loglikeimprovement[top[0],top[1]]>0:
top = np.unravel_index(np.nanargmax(diffloglikes), diffloglikes.shape)
if diffloglikes[top[0],top[1]]>0:
active[top[0]].extend(active[top[1]])
offset=sharedoffset[top[0],top[1]]
offset=offsets[top[0],top[1]]
inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
del inputs[top[1]]
del data[top[1]]
del active[top[1]]
#None = we need to recalculate
sharedloglikes[:,top[0]] = None
sharedloglikes[top[0],:] = None
sharedloglikes = np.delete(sharedloglikes,top[1],0)
sharedloglikes = np.delete(sharedloglikes,top[1],1)
individualloglikes[:,top[0]] = None
individualloglikes[top[0],:] = None
individualloglikes = np.delete(individualloglikes,top[1],0)
individualloglikes = np.delete(individualloglikes,top[1],1)
diffloglikes[:,top[0]] = None
diffloglikes[top[0],:] = None
diffloglikes = np.delete(diffloglikes,top[1],0)
diffloglikes = np.delete(diffloglikes,top[1],1)
else:
break

View file

@ -0,0 +1,132 @@
# Copyright (c) 2016, Mike Smith
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import GPy
import numpy as np
def add_index_column(inputs,data,clust):
S = data[0].shape[0] #number of time series
X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
Y = np.zeros([0,S])
#for each cluster, add their inputs and data to the new
#dataset. Note we add an index identifying which person is which data point.
#This is for the offset model to use, to allow it to know which data points
#to shift.
for i,p in enumerate(clust):
idx = i*np.ones([inputs[p].shape[0],1])
X = np.vstack([X,np.hstack([inputs[p],idx])])
Y = np.vstack([Y,data[p].T])
return X,Y
def get_individual_log_likelihood_offset(inputs,data,clust,common_kern,noise_scale):
m = GPy.models.GPRegression(inputs[clust],data[clust].T,common_kern.copy())
m.Gaussian_noise.fix(noise_scale)
m.optimize()
#m.optimize_restarts(2,verbose=False)
ll = np.sum(m.LOO())
return ll
def get_shared_log_likelihood_offset(inputs,data,clust,common_kern,noise_scale):
"""Get the log likelihood of a combined set of clusters, fitting the offsets
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
clust -- list of clusters to use
returns a tuple:
log likelihood and the offset
"""
X,Y = add_index_column(inputs,data,clust)
m = GPy.models.GPOffsetRegression(X,Y,common_kern.copy())
m.offset.set_prior(GPy.priors.Gaussian(0,20))
m.Gaussian_noise.fix(noise_scale)
try:
m.optimize()
#m.optimize_restarts(2,verbose=False)
except np.linalg.LinAlgError:
print("Optimization failed: assuming poor fit")
return -10000,0 #not being able to fit this is probably a sign this isn't a good fit?? TODO
ll = np.sum(m.LOO())
offset = m.offset.values[0]
return ll,offset
def cluster(data,inputs,common_kern,noise_scale=1.0,verbose=False):
"""Clusters data
Using the new offset model, this method uses a greedy algorithm to cluster
the data. It starts with all the data points in separate clusters and tests
whether combining them increases the overall log-likelihood (LL). It then
iteratively joins pairs of clusters which cause the greatest increase in
the LL, until no join increases the LL.
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
returns a list of the clusters.
"""
N=len(data)
#Define a set of N active cluster
active = []
for p in range(0,N):
active.append([p])
individualloglikes = np.zeros([len(active)])
individualloglikes[:] = None
sharedloglikes = np.zeros([len(active),len(active)])
sharedloglikes[:] = None
sharedoffset = np.zeros([len(active),len(active)])
it = 0
while True:
if verbose:
it +=1
print("Iteration %d" % it)
#Compute the log-likelihood of each cluster
for clusti in range(len(active)):
individualloglikes[clusti] = get_individual_log_likelihood_offset(inputs,data,clusti,common_kern,noise_scale)
#try combining with each other cluster...
for clusti in range(len(active)):
for clustj in range(clusti): #count from 0 to clustj-1
temp = [clusti,clustj]
if np.isnan(sharedloglikes[clusti,clustj]):
sharedloglikes[clusti,clustj],sharedoffset[clusti,clustj] = get_shared_log_likelihood_offset(inputs,data,temp,common_kern,noise_scale)
#how much likelihood improves with clustering
loglikeimprovement = sharedloglikes - (individualloglikes[:,None] + individualloglikes[None,:])
np.fill_diagonal(loglikeimprovement,np.nan)
top = np.unravel_index(np.nanargmax(sharedloglikes-individualloglikes), sharedloglikes.shape)
#print loglikeimprovement
if loglikeimprovement[top[0],top[1]]>-10:
active[top[0]].extend(active[top[1]])
offset=sharedoffset[top[0],top[1]]
inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
del inputs[top[1]]
del data[top[1]]
del active[top[1]]
#None = we need to recalculate
sharedloglikes[:,top[0]] = None
sharedloglikes[top[0],:] = None
sharedloglikes = np.delete(sharedloglikes,top[1],0)
sharedloglikes = np.delete(sharedloglikes,top[1],1)
individualloglikes[top[0]] = None
individualloglikes[top[1]] = None
individualloglikes = np.delete(individualloglikes,top[1])
#print len(individualloglikes)
else:
break
#TODO Add a way to return the offsets applied to all the time series
return active