diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index c9304f39..70474757 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -27,6 +27,9 @@ from .src.eq_ode2 import EQ_ODE2 from .src.integral import Integral from .src.integral_limits import Integral_Limits from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits +from .src.offset import Offset +from .src.tie import Tie + from .src.eq_ode1 import EQ_ODE1 from .src.trunclinear import TruncLinear,TruncLinear_inf from .src.splitKern import SplitKern,DEtime diff --git a/GPy/kern/src/offset.py b/GPy/kern/src/offset.py new file mode 100644 index 00000000..86bc27fe --- /dev/null +++ b/GPy/kern/src/offset.py @@ -0,0 +1,72 @@ +# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# Written by Mike Smith. michaeltsmith.org.uk + +from __future__ import division +import numpy as np +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +import math + +class Offset(Kern): + """ + A kernel in which subsets of the data can be shifted. + + :offsets list of offsets of length of number of subsets + """ + def __init__(self, kernel, input_dim, offsets=[], index_dim=-1, active_dims=None, name='offset'): + super(Offset, self).__init__(input_dim, active_dims, name) + assert isinstance(index_dim, int), "Offset kernel is only defined with one input dimension being the index" + self.kern = kernel + + self.offsets = Param('offset', offsets) + self.link_parameter(self.offsets) + + self.offset_index_dim = index_dim + self.link_parameters(kernel) #maybe whole class should inherit from CombinationKernel? + + def shift(self,X): + self.selected = np.array([int(x) for x in X[:,self.offset_index_dim]]) + offsets = np.hstack([0.0,self.offsets.values])[:,None] + return X - offsets[self.selected] + + def K(self,X ,X2=None): + return self.kern.K(self.shift(X),X2) + + def Kdiag(self,X): + return self.kern.Kdiag(self.shift(X)) + + + + def dX_doffset(self,sel,delta): + #given the select array (sel) and the offsets (delta) + #finds dX/dDelta + #returns them as a 2d matrix, one row/col[?] for each offset (delta). + + #a matrix G represents the effect of increasing the offset on the X_i passed to the kernel for each input. For example + #what effect will increasing offset 4 have on the kernel output of input 5? Answer: Gs[4,5]... (positive or zero) + Gs = [] + for i,d in enumerate(delta): + G = np.array(sel==(i+1))[:,None]*1 + Gs.append(G) + return np.array(Gs)*2.0 + + def update_gradients_full(self,dL_dK,X,X2=None): + dL_dX = self.kern.gradients_X(dL_dK, self.shift(X), X) + dX_doff = self.dX_doffset(self.selected,self.offsets.values) + for i in range(len(dX_doff)): + dL_doff = dL_dX * dX_doff[i] + self.offsets.gradient[i] = -np.sum(dL_doff) + self.kern.update_gradients_full(dL_dK,self.shift(X),X2) + + def gradients_X(self,dL_dK, X, X2=None): + return self.kern.gradients_X(dL_dK,self.shift(X),X2) + + def gradients_X_diag(self, dL_dKdiag, X): + return self.kern.gradients_X_diag(dL_dKdiag, self.shift(X)) + + def update_gradients_diag(self, dL_dKdiag, X): #TODO What is this? + raise NotImplementedError("Not implemented Offset.update_gradients_diag") + #self.kern.update_gradients_diag(dL_dKdiag, self.shift(X)) + diff --git a/GPy/kern/src/tie.py b/GPy/kern/src/tie.py new file mode 100644 index 00000000..13aa1bb3 --- /dev/null +++ b/GPy/kern/src/tie.py @@ -0,0 +1,109 @@ +# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) +# Written by Mike Smith. michaeltsmith.org.uk + +from __future__ import division +import numpy as np +from .kern import Kern +from ...core.parameterization import Param +from paramz.transformations import Logexp +import math + +class Tie(Kern): + """ + A kernel which takes another kernel and ties its parameters together + + :tied_param_list a list of lists of parameters to tie together. Each item + in the inner list is a regex, so one could use + ['independ.offset.Mat32.lengthscale', + 'independ.Mat32_1.lengthscale', + 'independ.Mat32.lengthscale'] + or just + ['.*lengthscale'] + """ + def __init__(self, kernel, input_dim, tied_param_list, active_dims=None, name='tie'): + #TODO We need to initialise the values of the parameters to be equal! + super(Tie, self).__init__(input_dim, active_dims, name) + self.kern = kernel + + self.params = [] #list of parameter objects to tie together + + for tlist in tied_param_list: + plist = [] #temp array for list of parameter objects for each tie + assert type(tlist) is list, "The tied_param_list should be a list of lists of strings" + for t in tlist: #expand regex in each inner list and add all matches + plist.extend(self.kern.grep_param_names(t)) + + if len(plist)==0: + print("Warning: No parameters were added for (%s)" % str(tlist)) + else: + self.params.append(plist) + + self.link_parameters(self.kern) + + + def get_totals(self,param_list): + """ + Returns the sum total of the gradients and values of the parameters in + the param_list + """ + v = None + g = None + for p in param_list: + if v is None: + v = p.values.copy() + g = p.gradient.copy() + else: + v += p.values + g += p.gradient + return v,g + + def update_gradients_full(self,dL_dK,X,X2=None): + self.kern.update_gradients_full(dL_dK,X,X2) + for pitem in self.params: + l = len(pitem) + v,g = self.get_totals(pitem) + for p in pitem: + #p.param_array[:] = v/l #TODO: Just do once in __init__ + p.gradient = g/l #pitem['main'].gradient + + def gradients_X(self,dL_dK, X, X2=None): + return self.kern.gradients_X(dL_dK, X, X2=None) + + def gradients_X_diag(self, dL_dKdiag, X): + return self.kern.gradients_X_diag(dL_dKdiag, X) + + def K(self,X ,X2=None): + return self.kern.K(X,X2) + + def Kdiag(self,X): + return self.kern.Kdiag(X) + + def get_ties_names(self,html=False): + textlist = [] + if html: + lb = "
" + else: + lb = "\n" + for ps in self.params: + innerlist = [] + for p in ps: + innerlist.append(p.hierarchy_name()) + textlist.append(innerlist) + st = lb+lb + st += "The following sets of parameters are tied:" + for texts in textlist: + st += lb + st += lb.join(texts) + st += lb + return st + + def __str__(self, header=True, VT100=True): + st = super(Tie, self).__str__(header, VT100) + st += self.get_ties_names() + return st + + def _repr_html_(self, header=True): + toprint = super(Tie,self)._repr_html_(header) + toprint+= self.get_ties_names(html=True) + return toprint diff --git a/GPy/util/cluster_with_offset.py b/GPy/util/cluster_with_offset.py index 32840a87..f172990a 100644 --- a/GPy/util/cluster_with_offset.py +++ b/GPy/util/cluster_with_offset.py @@ -4,60 +4,72 @@ import GPy import numpy as np -def add_index_column(inputs,data,clust): +def get_log_likelihood_diff(inputs,data,clusti,clustj,common_kern,noise_scale): + + #offset indicies + ind_offset = np.vstack([np.zeros([N,1]),np.ones([N,1]),nans]) - S = data[0].shape[0] #number of time series - - X = np.zeros([0,2]) #notice the extra column, this is for the cluster index - Y = np.zeros([0,S]) - - #for each cluster, add their inputs and data to the new - #dataset. Note we add an index identifying which person is which data point. - #This is for the offset model to use, to allow it to know which data points - #to shift. - for i,p in enumerate(clust): - idx = i*np.ones([inputs[p].shape[0],1]) - X = np.vstack([X,np.hstack([inputs[p],idx])]) - Y = np.vstack([Y,data[p].T]) - return X,Y - -def get_individual_log_likelihood_offset(inputs,data,clust,common_kern): - """Get the LL of a pair of clusters, but having them independent - - arguments: - inputs -- the 'X's in a list, one item per cluster - data -- the 'Y's in a list, one item per cluster - clust -- list of clusters to use - - returns log likelihood - """ - X,Y = add_index_column(inputs,data,clust) - k_independent = GPy.kern.IndependentOutputs(common_kern.copy(),index_dim=1) - m = GPy.models.GPRegression(X,Y,k_independent) - m.optimize() - ll=m.log_likelihood() - return ll + #independent output indicies + ind_indpoutputs = np.vstack([np.zeros([N*2,1]),np.ones([N,1]),np.ones([N,1])*2]) + X = np.hstack([X,ind_offset,ind_indpoutputs]) + Y1 = np.sin((X[0:N,0])/10.0)[:,None] + #Y2 = np.sin((X[0:N,0])/10.0)[:,None] + Y2 = np.cos((X[0:N,0])/10.0)[:,None] + Y1 += np.random.randn(Y1.shape[0],Y1.shape[1])*0.1 + Y2 += np.random.randn(Y2.shape[0],Y2.shape[1])*0.1 + Y = np.vstack([Y1,Y2,Y1,Y2]) -def get_shared_log_likelihood_offset(inputs,data,clust,common_kern): - """Get the log likelihood of a combined set of clusters, fitting the offsets - - arguments: - inputs -- the 'X's in a list, one item per cluster - data -- the 'Y's in a list, one item per cluster - clust -- list of clusters to use - - returns a tuple: - log likelihood and the offset - """ - X,Y = add_index_column(inputs,data,clust) - m = GPy.models.GPOffsetRegression(X,Y,common_kern.copy()) - # m.offset.set_prior(GPy.priors.Gaussian(0,20)) - m.optimize() - ll = m.log_likelihood() - offset = m.offset.values[0] - return ll,offset + #Structure of inputs: + # actual input | offset_kernel_index | indp_output_index + # 2.4 0 0 + # 2.9 0 0 + # 3.4 1 0 + # 3.9 1 0 + # 2.4 nan 1 + # 2.9 nan 1 + # 3.4 nan 2 + # 3.9 nan 2 + #print X + #print Y -def cluster(data,inputs,common_kern,verbose=False): + #base kernel to explain all time series with + common_kern = GPy.kern.Matern32(input_dim=1) + + #the offset kernel, that can shift one time series wrt another + offset_kern = GPy.kern.Offset(common_kern,2,[0]) + + #we want to discourage massive offsets, which can achieve good fits by simply moving the two datasets far apart + offset_kern.offset.set_prior(GPy.priors.Gaussian(0,4.0)) + + #our overall kernel contains our offset kernel and two common kernels + independent_kern = GPy.kern.IndependentOutputs([offset_kern,common_kern.copy(),common_kern.copy()],index_dim=2) + + tiekern = GPy.kern.Tie(independent_kern,3,[['.*lengthscale'],['.*variance']]) + + model = GPy.models.GPRegression(X,Y,tiekern) + model.optimize() + + + + + #base kernel to explain all time series with + common_kern = GPy.kern.Matern32(input_dim=1) + + #the offset kernel, that can shift one time series wrt another + offset_kern = GPy.kern.Offset(common_kern,2,[0]) + + #we want to discourage massive offsets, which can achieve good fits by simply moving the two datasets far apart + offset_kern.offset.set_prior(GPy.priors.Gaussian(0,4.0)) + + #our overall kernel contains our offset kernel and two common kernels + independent_kern = GPy.kern.IndependentOutputs([common_kern.copy(),common_kern.copy()],index_dim=1) + independent_model = GPy.models.GPRegression(indepX,indepY,independent_kern) + + offset_model = GPy.models.GPRegression(offsetX,offsetY,offset_kern) + + + +def cluster(data,inputs,common_kern,noise_scale=1.0,verbose=False): """Clusters data Using the new offset model, this method uses a greedy algorithm to cluster @@ -74,55 +86,44 @@ def cluster(data,inputs,common_kern,verbose=False): """ N=len(data) - #Define a set of N active cluster active = [] for p in range(0,N): active.append([p]) - individualloglikes = np.zeros([len(active),len(active)]) - individualloglikes[:] = None - sharedloglikes = np.zeros([len(active),len(active)]) - sharedloglikes[:] = None - sharedoffset = np.zeros([len(active),len(active)]) + diffloglikes = np.zeros([len(active),len(active)]) + diffloglikes[:] = None + offsets = np.zeros([len(active),len(active)]) it = 0 while True: - if verbose: it +=1 print("Iteration %d" % it) - - #Compute the log-likelihood of each cluster - for clusti in range(len(active)): - #try combining with each other cluster... + + for clusti in range(len(active)): for clustj in range(clusti): #count from 0 to clustj-1 - temp = [clusti,clustj] - if np.isnan(sharedloglikes[clusti,clustj]): - sharedloglikes[clusti,clustj],sharedoffset[clusti,clustj] = get_shared_log_likelihood_offset(inputs,data,temp,common_kern) - individualloglikes[clusti,clustj] = get_individual_log_likelihood_offset(inputs,data,temp,common_kern) + if np.isnan(diffloglikes[clusti,clustj]): + diffloglikes[clusti,clustj],offsets[clusti,clustj] = get_log_likelihood_diff(inputs,data,[clusti,clustj],common_kern,noise_scale) - loglikeimprovement = sharedloglikes - individualloglikes #how much likelihood improves with clustering - top = np.unravel_index(np.nanargmax(sharedloglikes-individualloglikes), sharedloglikes.shape) + #np.fill_diagonal(diffloglikes,np.nan) - if loglikeimprovement[top[0],top[1]]>0: + top = np.unravel_index(np.nanargmax(diffloglikes), diffloglikes.shape) + + if diffloglikes[top[0],top[1]]>0: active[top[0]].extend(active[top[1]]) - offset=sharedoffset[top[0],top[1]] + offset=offsets[top[0],top[1]] inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset]) data[top[0]] = np.hstack([data[top[0]],data[top[1]]]) del inputs[top[1]] del data[top[1]] del active[top[1]] - + #None = we need to recalculate - sharedloglikes[:,top[0]] = None - sharedloglikes[top[0],:] = None - sharedloglikes = np.delete(sharedloglikes,top[1],0) - sharedloglikes = np.delete(sharedloglikes,top[1],1) - individualloglikes[:,top[0]] = None - individualloglikes[top[0],:] = None - individualloglikes = np.delete(individualloglikes,top[1],0) - individualloglikes = np.delete(individualloglikes,top[1],1) + diffloglikes[:,top[0]] = None + diffloglikes[top[0],:] = None + diffloglikes = np.delete(diffloglikes,top[1],0) + diffloglikes = np.delete(diffloglikes,top[1],1) else: break diff --git a/GPy/util/cluster_with_offset_using_loo.py b/GPy/util/cluster_with_offset_using_loo.py new file mode 100644 index 00000000..6d03a65e --- /dev/null +++ b/GPy/util/cluster_with_offset_using_loo.py @@ -0,0 +1,132 @@ +# Copyright (c) 2016, Mike Smith +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import GPy +import numpy as np + +def add_index_column(inputs,data,clust): + + S = data[0].shape[0] #number of time series + + X = np.zeros([0,2]) #notice the extra column, this is for the cluster index + Y = np.zeros([0,S]) + + #for each cluster, add their inputs and data to the new + #dataset. Note we add an index identifying which person is which data point. + #This is for the offset model to use, to allow it to know which data points + #to shift. + for i,p in enumerate(clust): + idx = i*np.ones([inputs[p].shape[0],1]) + X = np.vstack([X,np.hstack([inputs[p],idx])]) + Y = np.vstack([Y,data[p].T]) + return X,Y + +def get_individual_log_likelihood_offset(inputs,data,clust,common_kern,noise_scale): + m = GPy.models.GPRegression(inputs[clust],data[clust].T,common_kern.copy()) + m.Gaussian_noise.fix(noise_scale) + m.optimize() + #m.optimize_restarts(2,verbose=False) + ll = np.sum(m.LOO()) + return ll + +def get_shared_log_likelihood_offset(inputs,data,clust,common_kern,noise_scale): + """Get the log likelihood of a combined set of clusters, fitting the offsets + + arguments: + inputs -- the 'X's in a list, one item per cluster + data -- the 'Y's in a list, one item per cluster + clust -- list of clusters to use + + returns a tuple: + log likelihood and the offset + """ + X,Y = add_index_column(inputs,data,clust) + m = GPy.models.GPOffsetRegression(X,Y,common_kern.copy()) + m.offset.set_prior(GPy.priors.Gaussian(0,20)) + m.Gaussian_noise.fix(noise_scale) + try: + m.optimize() + #m.optimize_restarts(2,verbose=False) + except np.linalg.LinAlgError: + print("Optimization failed: assuming poor fit") + return -10000,0 #not being able to fit this is probably a sign this isn't a good fit?? TODO + ll = np.sum(m.LOO()) + offset = m.offset.values[0] + return ll,offset + +def cluster(data,inputs,common_kern,noise_scale=1.0,verbose=False): + """Clusters data + + Using the new offset model, this method uses a greedy algorithm to cluster + the data. It starts with all the data points in separate clusters and tests + whether combining them increases the overall log-likelihood (LL). It then + iteratively joins pairs of clusters which cause the greatest increase in + the LL, until no join increases the LL. + + arguments: + inputs -- the 'X's in a list, one item per cluster + data -- the 'Y's in a list, one item per cluster + + returns a list of the clusters. + """ + N=len(data) + + + #Define a set of N active cluster + active = [] + for p in range(0,N): + active.append([p]) + + individualloglikes = np.zeros([len(active)]) + individualloglikes[:] = None + sharedloglikes = np.zeros([len(active),len(active)]) + sharedloglikes[:] = None + sharedoffset = np.zeros([len(active),len(active)]) + + it = 0 + while True: + + if verbose: + it +=1 + print("Iteration %d" % it) + + #Compute the log-likelihood of each cluster + for clusti in range(len(active)): + individualloglikes[clusti] = get_individual_log_likelihood_offset(inputs,data,clusti,common_kern,noise_scale) + + #try combining with each other cluster... + for clusti in range(len(active)): + for clustj in range(clusti): #count from 0 to clustj-1 + temp = [clusti,clustj] + if np.isnan(sharedloglikes[clusti,clustj]): + sharedloglikes[clusti,clustj],sharedoffset[clusti,clustj] = get_shared_log_likelihood_offset(inputs,data,temp,common_kern,noise_scale) + + #how much likelihood improves with clustering + loglikeimprovement = sharedloglikes - (individualloglikes[:,None] + individualloglikes[None,:]) + np.fill_diagonal(loglikeimprovement,np.nan) + + top = np.unravel_index(np.nanargmax(sharedloglikes-individualloglikes), sharedloglikes.shape) + #print loglikeimprovement + if loglikeimprovement[top[0],top[1]]>-10: + active[top[0]].extend(active[top[1]]) + offset=sharedoffset[top[0],top[1]] + inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset]) + data[top[0]] = np.hstack([data[top[0]],data[top[1]]]) + del inputs[top[1]] + del data[top[1]] + del active[top[1]] + + #None = we need to recalculate + sharedloglikes[:,top[0]] = None + sharedloglikes[top[0],:] = None + sharedloglikes = np.delete(sharedloglikes,top[1],0) + sharedloglikes = np.delete(sharedloglikes,top[1],1) + individualloglikes[top[0]] = None + individualloglikes[top[1]] = None + individualloglikes = np.delete(individualloglikes,top[1]) + #print len(individualloglikes) + else: + break + + #TODO Add a way to return the offsets applied to all the time series + return active