diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 43fa0147..7d092c26 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -75,6 +75,111 @@ def silhouette(): print(m) return m +def coregionalisation_toy2(): + """ + A simple demonstration of coregionalisation on two sinusoidal functions + """ + X1 = np.random.rand(50,1)*8 + X2 = np.random.rand(30,1)*5 + index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) + X = np.hstack((np.vstack((X1,X2)),index)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 + Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2. + Y = np.vstack((Y1,Y2)) + + k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k2 = GPy.kern.coregionalise(2,1) + k = k1.prod_orthogonal(k2) + m = GPy.models.GP_regression(X,Y,kernel=k) + m.constrain_fixed('rbf_var',1.) + m.constrain_positive('kappa') + m.ensure_default_constraints() + m.optimize() + + pb.figure() + Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) + Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1)))) + mean, var,low,up = m.predict(Xtest1) + GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up) + mean, var,low,up = m.predict(Xtest2) + GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up) + pb.plot(X1[:,0],Y1[:,0],'rx',mew=2) + pb.plot(X2[:,0],Y2[:,0],'gx',mew=2) + return m + + + + +def coregionalisation_toy(): + """ + A simple demonstration of coregionalisation on two sinusoidal functions + """ + X1 = np.random.rand(50,1)*8 + X2 = np.random.rand(30,1)*5 + index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) + X = np.hstack((np.vstack((X1,X2)),index)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 + Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05 + Y = np.vstack((Y1,Y2)) + + k1 = GPy.kern.rbf(1) + k2 = GPy.kern.coregionalise(2,1) + k = k1.prod_orthogonal(k2) + m = GPy.models.GP_regression(X,Y,kernel=k) + m.constrain_fixed('rbf_var',1.) + m.constrain_positive('kappa') + m.ensure_default_constraints() + m.optimize() + + pb.figure() + Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) + Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1)))) + mean, var,low,up = m.predict(Xtest1) + GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up) + mean, var,low,up = m.predict(Xtest2) + GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up) + pb.plot(X1[:,0],Y1[:,0],'rx',mew=2) + pb.plot(X2[:,0],Y2[:,0],'gx',mew=2) + return m + + +def coregionalisation_sparse(): + """ + A simple demonstration of coregionalisation on two sinusoidal functions + """ + X1 = np.random.rand(500,1)*8 + X2 = np.random.rand(300,1)*5 + index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) + X = np.hstack((np.vstack((X1,X2)),index)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 + Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05 + Y = np.vstack((Y1,Y2)) + + Z = np.hstack((np.random.rand(25,1)*8,np.random.randint(0,2,25)[:,None])) + + k1 = GPy.kern.rbf(1) + k2 = GPy.kern.coregionalise(2,2) + k = k1.prod_orthogonal(k2) + GPy.kern.white(2,0.001) + + m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z) + m.constrain_fixed('rbf_var',1.) + m.constrain_positive('kappa') + m.constrain_fixed('iip') + m.ensure_default_constraints() + #m.optimize() + + pb.figure() + Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) + Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1)))) + mean, var,low,up = m.predict(Xtest1) + GPy.util.plot.gpplot(Xtest1[:,0],mean,low,up) + mean, var,low,up = m.predict(Xtest2) + GPy.util.plot.gpplot(Xtest2[:,0],mean,low,up) + pb.plot(X1[:,0],Y1[:,0],'rx',mew=2) + pb.plot(X2[:,0],Y2[:,0],'gx',mew=2) + return m + + def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000): """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher.""" @@ -160,3 +265,4 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf) length_scale_lls.append(model.log_likelihood()) lls.append(length_scale_lls) return np.array(lls) + diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 3e20644b..625f6080 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,5 +2,5 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, product, product_orthogonal, symmetric, coregionalise from kern import kern diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 5d665d93..9b58c282 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -21,6 +21,7 @@ from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from product import product as productpart from product_orthogonal import product_orthogonal as product_orthogonalpart from symmetric import symmetric as symmetric_part +from coregionalise import coregionalise as coregionalise_part #TODO these s=constructors are not as clean as we'd like. Tidy the code up #using meta-classes to make the objects construct properly wthout them. @@ -274,3 +275,8 @@ def symmetric(k): k_.parts = [symmetric_part(p) for p in k.parts] return k_ +def coregionalise(Nout,R=1, W=None, kappa=None): + p = coregionalise_part(Nout,R,W,kappa) + return kern(1,[p]) + + diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py new file mode 100644 index 00000000..c24cb568 --- /dev/null +++ b/GPy/kern/coregionalise.py @@ -0,0 +1,88 @@ +# Copyright (c) 2012, James Hensman and Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kernpart import kernpart +import numpy as np +from GPy.util.linalg import mdot, pdinv + +class coregionalise(kernpart): + """ + Kernel for Intrisec Corregionalization Models + """ + def __init__(self,Nout,R=1, W=None, kappa=None): + self.D = 1 + self.name = 'coregion' + self.Nout = Nout + self.R = R + if W is None: + self.W = np.ones((self.Nout,self.R)) + else: + assert W.shape==(self.Nout,self.R) + self.W = W + if kappa is None: + kappa = np.ones(self.Nout) + else: + assert kappa.shape==(self.Nout,) + self.kappa = kappa + self.Nparam = self.Nout*(self.R + 1) + self._set_params(np.hstack([self.W.flatten(),self.kappa])) + + def _get_params(self): + return np.hstack([self.W.flatten(),self.kappa]) + + def _set_params(self,x): + assert x.size == self.Nparam + self.kappa = x[-self.Nout:] + self.W = x[:-self.Nout].reshape(self.Nout,self.R) + self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) + + def _get_param_names(self): + + return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)] + + def K(self,index,index2,target): + index = np.asarray(index,dtype=np.int) + if index2 is None: + index2 = index + else: + index2 = np.asarray(index2,dtype=np.int) + ii,jj = np.meshgrid(index2,index) + target += self.B[ii,jj] + + def Kdiag(self,index,target): + target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] + + def dK_dtheta(self,partial,index,index2,target): + index = np.asarray(index,dtype=np.int) + if index2 is None: + index2 = index + else: + index2 = np.asarray(index2,dtype=np.int) + ii,jj = np.meshgrid(index2,index) + PK = np.zeros((self.R,self.R)) + partial_small = np.zeros_like(self.B) + for i in range(self.Nout): + for j in range(self.Nout): + partial_small[i,j] = np.sum(partial[(ii==i)*(jj==j)]) + dkappa = np.diag(partial_small) + + dW = 2.*(self.W[:,None,:]*partial_small[:,:,None]).sum(0) + + target += np.hstack([dW.flatten(),dkappa]) + + def dKdiag_dtheta_foo(self,partial,index,target): + index = np.asarray(index,dtype=np.int).flatten() + partial_small = np.zeros(self.Nout) + for i in range(self.Nout): + partial_small[i] += np.sum(partial[index==i]) + dW = 2.*self.W*partial_small[:,None] + dkappa = partial_small + target += np.hstack([dW.flatten(),dkappa]) + + def dK_dX(self,partial,X,X2,target): + pass + + def dKdiag_dtheta(self,partial,index,target): + self.dK_dtheta(np.diag(partial),index,index,target) + + diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py index a729c126..6b02b868 100644 --- a/GPy/kern/product_orthogonal.py +++ b/GPy/kern/product_orthogonal.py @@ -46,14 +46,6 @@ class product_orthogonal(kernpart): self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2) target += target1 * target2 - def Kdiag(self,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros((X.shape[0],)) - target2 = np.zeros((X.shape[0],)) - self.k1.Kdiag(X[:,0:self.k1.D],target1) - self.k2.Kdiag(X[:,self.k1.D:],target2) - target += target1 * target2 - def dK_dtheta(self,partial,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X @@ -70,6 +62,22 @@ class product_orthogonal(kernpart): target[:self.k1.Nparam] += k1_target target[self.k1.Nparam:] += k2_target + def Kdiag(self,X,target): + """Compute the diagonal of the covariance matrix associated to X.""" + target1 = np.zeros((X.shape[0],)) + target2 = np.zeros((X.shape[0],)) + self.k1.Kdiag(X[:,:self.k1.D],target1) + self.k2.Kdiag(X[:,self.k1.D:],target2) + target += target1 * target2 + + def dKdiag_dtheta(self,partial,X,target): + K1 = np.zeros(X.shape[0]) + K2 = np.zeros(X.shape[0]) + self.k1.Kdiag(X[:,:self.k1.D],K1) + self.k2.Kdiag(X[:,self.k1.D:],K2) + self.k1.dKdiag_dtheta(partial*K2,X[:,:self.k1.D],target[:self.k1.Nparam]) + self.k2.dKdiag_dtheta(partial*K1,X[:,self.k1.D:],target[self.k1.Nparam:]) + def dK_dX(self,partial,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 91327aab..08ac1bb1 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -72,7 +72,7 @@ class GP(model): self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas - self.K = self.kern.K(self.X,slices1=self.Xslices) + self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices) self.K += self.likelihood.covariance_matrix self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) @@ -129,7 +129,7 @@ class GP(model): For the likelihood parameters, pass in alpha = K^-1 y """ - return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) def _raw_predict(self,_Xnew,slices=None, full_cov=False): """ diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 957bb44d..3d738106 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -16,6 +16,22 @@ class KernelTests(unittest.TestCase): print m self.assertTrue(m.checkgrad()) + def test_coregionalisation(self): + X1 = np.random.rand(50,1)*8 + X2 = np.random.rand(30,1)*5 + index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) + X = np.hstack((np.vstack((X1,X2)),index)) + Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 + Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2. + Y = np.vstack((Y1,Y2)) + + k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k2 = GPy.kern.coregionalise(2,1) + k = k1.prod_orthogonal(k2) + m = GPy.models.GP_regression(X,Y,kernel=k) + self.assertTrue(m.checkgrad()) + + if __name__ == "__main__": diff --git a/setup.py b/setup.py index 40c89ccb..d24171e2 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,8 @@ # -*- coding: utf-8 -*- import os -from numpy.distutils.core import Extension, setup +from setuptools import setup +#from numpy.distutils.core import Extension, setup #from sphinx.setup_command import BuildDoc # Version number