mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
[merge] devel
This commit is contained in:
commit
746d4daae8
8 changed files with 356 additions and 3 deletions
|
|
@ -26,3 +26,5 @@ from .dpgplvm import DPBayesianGPLVM
|
|||
from .state_space_model import StateSpace
|
||||
|
||||
from .ibp_lfm import IBPLFM
|
||||
|
||||
from .gp_offset_regression import GPOffsetRegression
|
||||
|
|
|
|||
95
GPy/models/gp_offset_regression.py
Normal file
95
GPy/models/gp_offset_regression.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
# Written by Mike Smith. michaeltsmith.org.uk
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..core import Param
|
||||
|
||||
class GPOffsetRegression(GP):
|
||||
"""
|
||||
Gaussian Process model for offset regression
|
||||
|
||||
:param X: input observations, we assume for this class that this has one dimension of actual inputs and the last dimension should be the index of the cluster (so X should be Nx2)
|
||||
:param Y: observed values (Nx1?)
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
:param Norm normalizer: [False]
|
||||
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
|
||||
|
||||
Normalize Y with the norm given.
|
||||
If normalizer is False, no normalization will be done
|
||||
If it is None, we use GaussianNorm(alization)
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
|
||||
|
||||
assert X.shape[1]>1, "Need at least two input dimensions, as last dimension is the label of the cluster"
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1]-1)
|
||||
|
||||
#self._log_marginal_likelihood = np.nan #todo
|
||||
|
||||
likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||
self.X_fixed = X[:,:-1]
|
||||
self.selected = np.array([int(x) for x in X[:,-1]])
|
||||
|
||||
|
||||
super(GPOffsetRegression, self).__init__(X, Y, kernel, likelihood, name='GP offset regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
|
||||
maxcluster = np.max(self.selected)
|
||||
self.offset = Param('offset', np.zeros(maxcluster))
|
||||
#self.offset.set_prior(...)
|
||||
self.link_parameter(self.offset)
|
||||
|
||||
#def dr_doffset(self, X, sel): #how much r changes wrt the offset hyperparameters
|
||||
|
||||
#def dL_doffset(self, X, sel):
|
||||
# dL_dr = self.dK_dr_via_X(X, X) * dL_dK
|
||||
|
||||
|
||||
def dr_doffset(self,X,sel,delta):
|
||||
#given an input matrix, X and the offsets (delta)
|
||||
#finds dr/dDelta
|
||||
#returns them as a list, one for each offset (delta).
|
||||
#get the input values
|
||||
|
||||
#a matrix G represents the effect of increasing the offset on the radius passed to the kernel for each input. For example
|
||||
#what effect will increasing offset 4 have on the kernel output of inputs 5 and 8? Answer: Gs[4][5,8]... (positive or negative)
|
||||
Gs = []
|
||||
for i,d in enumerate(delta):
|
||||
#X[sel==(i+1)]-=d
|
||||
G = np.repeat(np.array(sel==(i+1))[:,None]*1,len(X),axis=1) - np.repeat(np.array(sel==(i+1))[None,:]*1,len(X),axis=0)
|
||||
Gs.append(G)
|
||||
#does subtracting the two Xs end up positive or negative (if negative we need to flip the sign in G).
|
||||
w = np.repeat(X,len(X),axis=1) - np.repeat(X.T,len(X),axis=0)
|
||||
dr_doffsets = []
|
||||
for i,d in enumerate(delta):
|
||||
dr_doffset = np.sign(w * Gs[i])
|
||||
#print "dr_doffset %d" % i
|
||||
#print dr_doffset
|
||||
#print Gs[i]
|
||||
#print w
|
||||
dr_doffsets.append(dr_doffset)
|
||||
|
||||
#lastly we need to divide by the lengthscale: So far we've found d(X_i - X_j)/dOffsets
|
||||
#we want dr/dOffsets. (X_i - X_j)/lengthscale = r
|
||||
dr_doffsets /= self.kern.lengthscale
|
||||
return dr_doffsets
|
||||
|
||||
def parameters_changed(self):
|
||||
offsets = np.hstack([0.0,self.offset.values])[:,None]
|
||||
|
||||
self.X = self.X_fixed - offsets[self.selected]
|
||||
super(GPOffsetRegression, self).parameters_changed()
|
||||
|
||||
dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
|
||||
|
||||
dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
|
||||
for i in range(len(dr_doff)):
|
||||
dL_doff = dL_dr * dr_doff[i]
|
||||
self.offset.gradient[i] = -np.sum(dL_doff)
|
||||
|
||||
|
|
@ -61,6 +61,8 @@ class AbstractPlottingLibrary(object):
|
|||
"""
|
||||
Get a new figure with nrows and ncolumns subplots.
|
||||
Does not initialize the canvases yet.
|
||||
|
||||
There is individual kwargs for the individual plotting libraries to use.
|
||||
"""
|
||||
raise NotImplementedError("Implement all plot functions in AbstractPlottingLibrary in order to use your own plotting library")
|
||||
|
||||
|
|
|
|||
|
|
@ -427,8 +427,29 @@ class MiscTests(unittest.TestCase):
|
|||
warp_m.predict_quantiles(X)
|
||||
warp_m.log_predictive_density(X, Y)
|
||||
warp_m.predict_in_warped_space = False
|
||||
warp_m.predict(X)
|
||||
warp_m.predict_quantiles(X)
|
||||
warp_m.plot()
|
||||
warp_m.predict_in_warped_space = True
|
||||
warp_m.plot()
|
||||
|
||||
def test_offset_regression(self):
|
||||
#Tests GPy.models.GPOffsetRegression. Using two small time series
|
||||
#from a sine wave, we confirm the algorithm determines that the
|
||||
#likelihood is maximised when the offset hyperparameter is approximately
|
||||
#equal to the actual offset in X between the two time series.
|
||||
offset = 3
|
||||
X1 = np.arange(0,50,5.0)[:,None]
|
||||
X2 = np.arange(0+offset,50+offset,5.0)[:,None]
|
||||
X = np.vstack([X1,X2])
|
||||
ind = np.vstack([np.zeros([10,1]),np.ones([10,1])])
|
||||
X = np.hstack([X,ind])
|
||||
Y = np.sin((X[0:10,0])/30.0)[:,None]
|
||||
Y = np.vstack([Y,Y])
|
||||
|
||||
m = GPy.models.GPOffsetRegression(X,Y)
|
||||
m.rbf.lengthscale=5.0 #make it something other than one to check our gradients properly!
|
||||
assert m.checkgrad(), "Gradients of offset parameters don't match numerical approximations."
|
||||
m.optimize()
|
||||
assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))
|
||||
|
||||
|
||||
class GradientTests(np.testing.TestCase):
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#===============================================================================
|
||||
|
||||
import unittest, numpy as np
|
||||
import GPy
|
||||
|
||||
class TestDebug(unittest.TestCase):
|
||||
def test_checkFinite(self):
|
||||
|
|
@ -107,3 +108,50 @@ class TestDebug(unittest.TestCase):
|
|||
self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5])
|
||||
self.assertTrue(d[tuple(X[:,1])] == [1])
|
||||
|
||||
def test_offset_cluster(self):
|
||||
#Tests the GPy.util.cluster_with_offset.cluster utility with a small
|
||||
#test data set. Not using random noise just in case it occasionally
|
||||
#causes it not to cluster correctly.
|
||||
#groundtruth cluster identifiers are: [0,1,1,0]
|
||||
|
||||
#data contains a list of the four sets of time series (3 per data point)
|
||||
|
||||
data = [np.array([[ 2.18094245, 1.96529789, 2.00265523, 2.18218742, 2.06795428],
|
||||
[ 1.62254829, 1.75748448, 1.83879347, 1.87531326, 1.52503496],
|
||||
[ 1.54589609, 1.61607914, 2.00463192, 1.48771394, 1.63339218]]),
|
||||
np.array([[ 2.86766106, 2.97953437, 2.91958876, 2.92510506, 3.03239241],
|
||||
[ 2.57368423, 2.59954886, 3.10000395, 2.75806125, 2.89865704],
|
||||
[ 2.58916318, 2.53698259, 2.63858411, 2.63102504, 2.51853901]]),
|
||||
np.array([[ 2.77834168, 2.9618564 , 2.88482141, 3.24259745, 2.9716821 ],
|
||||
[ 2.60675576, 2.67095624, 2.94824436, 2.80520631, 2.87247516],
|
||||
[ 2.49543562, 2.5492281 , 2.6505866 , 2.65015308, 2.59738616]]),
|
||||
np.array([[ 1.76783086, 2.21666738, 2.07939706, 1.9268263 , 2.23360121],
|
||||
[ 1.94305547, 1.94648592, 2.1278921 , 2.09481457, 2.08575238],
|
||||
[ 1.69336013, 1.72285186, 1.6339506 , 1.61212022, 1.39198698]])]
|
||||
|
||||
#inputs contains their associated X values
|
||||
|
||||
inputs = [np.array([[ 0. ],
|
||||
[ 0.68040097],
|
||||
[ 1.20316795],
|
||||
[ 1.798749 ],
|
||||
[ 2.14891733]]), np.array([[ 0. ],
|
||||
[ 0.51910637],
|
||||
[ 0.98259352],
|
||||
[ 1.57442965],
|
||||
[ 1.82515098]]), np.array([[ 0. ],
|
||||
[ 0.66645478],
|
||||
[ 1.59464591],
|
||||
[ 1.69769551],
|
||||
[ 1.80932752]]), np.array([[ 0. ],
|
||||
[ 0.87512108],
|
||||
[ 1.71881079],
|
||||
[ 2.67162871],
|
||||
[ 3.23761907]])]
|
||||
|
||||
#try doing the clustering
|
||||
active = GPy.util.cluster_with_offset.cluster(data,inputs)
|
||||
#check to see that the clustering has correctly clustered the time series.
|
||||
clusters = set([frozenset(cluster) for cluster in active])
|
||||
assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
|
||||
assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"
|
||||
|
|
|
|||
|
|
@ -16,3 +16,4 @@ from . import initialization
|
|||
from . import multioutput
|
||||
from . import parallel
|
||||
from . import functions
|
||||
from . import cluster_with_offset
|
||||
|
|
|
|||
184
GPy/util/cluster_with_offset.py
Normal file
184
GPy/util/cluster_with_offset.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# Copyright (c) 2016, Mike Smith
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import GPy
|
||||
import numpy as np
|
||||
import sys #so I can print dots
|
||||
|
||||
def get_log_likelihood(inputs,data,clust):
|
||||
"""Get the LL of a combined set of clusters, ignoring time series offsets.
|
||||
|
||||
Get the log likelihood of a cluster without worrying about the fact
|
||||
different time series are offset. We're using it here really for those
|
||||
cases in which we only have one cluster to get the loglikelihood of.
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
clust -- list of clusters to use
|
||||
|
||||
returns a tuple:
|
||||
log likelihood and the offset (which is always zero for this model)
|
||||
"""
|
||||
|
||||
S = data[0].shape[0] #number of time series
|
||||
|
||||
#build a new dataset from the clusters, by combining all clusters together
|
||||
X = np.zeros([0,1])
|
||||
Y = np.zeros([0,S])
|
||||
|
||||
#for each person in the cluster,
|
||||
#add their inputs and data to the new dataset
|
||||
for p in clust:
|
||||
X = np.vstack([X,inputs[p]])
|
||||
Y = np.vstack([Y,data[p].T])
|
||||
|
||||
#find the loglikelihood. We just add together the LL for each time series.
|
||||
#ll=0
|
||||
#for s in range(S):
|
||||
# m = GPy.models.GPRegression(X,Y[:,s][:,None])
|
||||
# m.optimize()
|
||||
# ll+=m.log_likelihood()
|
||||
|
||||
m = GPy.models.GPRegression(X,Y)
|
||||
m.optimize()
|
||||
ll=m.log_likelihood()
|
||||
return ll,0
|
||||
|
||||
def get_log_likelihood_offset(inputs,data,clust):
|
||||
"""Get the log likelihood of a combined set of clusters, fitting the offsets
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
clust -- list of clusters to use
|
||||
|
||||
returns a tuple:
|
||||
log likelihood and the offset
|
||||
"""
|
||||
|
||||
#if we've only got one cluster, the model has an error, so we want to just
|
||||
#use normal GPRegression.
|
||||
if len(clust)==1:
|
||||
return get_log_likelihood(inputs,data,clust)
|
||||
|
||||
S = data[0].shape[0] #number of time series
|
||||
|
||||
X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
|
||||
Y = np.zeros([0,S])
|
||||
|
||||
#for each person in the cluster, add their inputs and data to the new
|
||||
#dataset. Note we add an index identifying which person is which data point.
|
||||
#This is for the offset model to use, to allow it to know which data points
|
||||
#to shift.
|
||||
for i,p in enumerate(clust):
|
||||
idx = i*np.ones([inputs[p].shape[0],1])
|
||||
X = np.vstack([X,np.hstack([inputs[p],idx])])
|
||||
Y = np.vstack([Y,data[p].T])
|
||||
|
||||
m = GPy.models.GPOffsetRegression(X,Y)
|
||||
#TODO: How to select a sensible prior?
|
||||
m.offset.set_prior(GPy.priors.Gaussian(0,20))
|
||||
#TODO: Set a sensible start value for the length scale,
|
||||
#make it long to help the offset fit.
|
||||
|
||||
m.optimize()
|
||||
|
||||
ll = m.log_likelihood()
|
||||
offset = m.offset.values[0]
|
||||
return ll,offset
|
||||
|
||||
def cluster(data,inputs,verbose=False):
|
||||
"""Clusters data
|
||||
|
||||
Using the new offset model, this method uses a greedy algorithm to cluster
|
||||
the data. It starts with all the data points in separate clusters and tests
|
||||
whether combining them increases the overall log-likelihood (LL). It then
|
||||
iteratively joins pairs of clusters which cause the greatest increase in
|
||||
the LL, until no join increases the LL.
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
|
||||
returns a list of the clusters.
|
||||
"""
|
||||
N=len(data)
|
||||
|
||||
|
||||
#Define a set of N active cluster
|
||||
active = []
|
||||
for p in range(0,N):
|
||||
active.append([p])
|
||||
|
||||
loglikes = np.zeros(len(active))
|
||||
loglikes[:] = None
|
||||
|
||||
pairloglikes = np.zeros([len(active),len(active)])
|
||||
pairloglikes[:] = None
|
||||
pairoffset = np.zeros([len(active),len(active)])
|
||||
|
||||
it = 0
|
||||
while True:
|
||||
|
||||
if verbose:
|
||||
it +=1
|
||||
print("Iteration %d" % it)
|
||||
|
||||
#Compute the log-likelihood of each cluster (add them together)
|
||||
for clusti in range(len(active)):
|
||||
if verbose:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
if np.isnan(loglikes[clusti]):
|
||||
loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
|
||||
|
||||
#try combining with each other cluster...
|
||||
for clustj in range(clusti): #count from 0 to clustj-1
|
||||
temp = [clusti,clustj]
|
||||
if np.isnan(pairloglikes[clusti,clustj]):
|
||||
pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
|
||||
|
||||
seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
|
||||
loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
|
||||
top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
|
||||
|
||||
#if loglikeimprovement.shape[0]<3:
|
||||
# #no more clustering to do - this shouldn't happen really unless
|
||||
# #we've set the threshold to apply clustering to less than 0
|
||||
# break
|
||||
|
||||
#if theres further clustering to be done...
|
||||
if loglikeimprovement[top[0],top[1]]>0:
|
||||
active[top[0]].extend(active[top[1]])
|
||||
offset=pairoffset[top[0],top[1]]
|
||||
inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
|
||||
data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
|
||||
del inputs[top[1]]
|
||||
del data[top[1]]
|
||||
del active[top[1]]
|
||||
|
||||
#None = message to say we need to recalculate
|
||||
pairloglikes[:,top[0]] = None
|
||||
pairloglikes[top[0],:] = None
|
||||
pairloglikes = np.delete(pairloglikes,top[1],0)
|
||||
pairloglikes = np.delete(pairloglikes,top[1],1)
|
||||
loglikes[top[0]] = None
|
||||
loglikes = np.delete(loglikes,top[1])
|
||||
else:
|
||||
break
|
||||
|
||||
#if loglikeimprovement[top[0],top[1]]>0:
|
||||
# print "joined"
|
||||
# print top
|
||||
# print offset
|
||||
# print offsets
|
||||
# print offsets[top[1]]-offsets[top[0]]
|
||||
|
||||
#TODO Add a way to return the offsets applied to all the time series
|
||||
return active
|
||||
|
||||
#starttime = time.time()
|
||||
#active = cluster(data,inputs)
|
||||
#endtime = time.time()
|
||||
#print "TOTAL TIME %0.4f" % (endtime-starttime)
|
||||
Loading…
Add table
Add a link
Reference in a new issue