diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py index eee74461..f977e18d 100644 --- a/GPy/examples/oil_flow_demo.py +++ b/GPy/examples/oil_flow_demo.py @@ -20,26 +20,28 @@ def plot_oil(X, theta, labels, label): plt.plot(flow_type[:,0], flow_type[:,1], 'bx') plt.title(label) -data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r')) +data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r')) Y = data['DataTrn'] N, D = Y.shape -selected = np.random.permutation(N)[:200] +selected = np.random.permutation(N)#[:200] labels = data['DataTrnLbls'][selected] Y = Y[selected] N, D = Y.shape Y -= Y.mean(axis=0) -Y /= Y.std(axis=0) +#Y /= Y.std(axis=0) -Q = 2 -m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15) -m1.constrain_positive('(rbf|bias|noise)') -m1.constrain_bounded('white', 1e-6, 1.0) +Q = 7 +k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q) +m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12) +m.constrain_positive('(rbf|bias|S|white|noise)') +# m.constrain_bounded('white', 1e-6, 100.0) +# m.constrain_bounded('noise', 1e-4, 1000.0) -plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization') +plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization') # m.optimize(messages = True) -m1.optimize('bfgs', messages = True) -plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') +m.optimize('tnc', messages = True) +plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM') # pb.figure() # m.plot() # pb.title('PCA initialisation') @@ -47,7 +49,7 @@ plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') # m.optimize(messages = 1) # m.plot() # pb.title('After optimisation') -m = GPy.models.GPLVM(Y, Q) -m.constrain_positive('(white|rbf|bias|noise)') -m.optimize() -plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') +# m = GPy.models.GPLVM(Y, Q) +# m.constrain_positive('(white|rbf|bias|noise)') +# m.optimize() +# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 0dcd3ea1..c22e68c7 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -95,7 +95,7 @@ class rbf(kernpart): target += self.variance def dpsi0_dtheta(self,partial,Z,mu,S,target): - target[0] += 1. + target[0] += np.sum(partial) def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass @@ -113,7 +113,9 @@ class rbf(kernpart): def dpsi1_dZ(self,partial,Z,mu,S,target): self._psi_computations(Z,mu,S) - target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0) + denominator = (self.lengthscale2*(self._psi1_denom)) + dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) + target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): self._psi_computations(Z,mu,S) @@ -132,13 +134,14 @@ class rbf(kernpart): d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = d_length.sum(0) target[0] += np.sum(partial*d_var) - target[1] += np.sum(d_length*partial) + target[1] += np.sum(d_length*partial[:,:,None]) def dpsi2_dZ(self,partial,Z,mu,S,target): - """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) - dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) - target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) + term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q + term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q + dZ = self._psi2[:,:,:,None] * (term1[None] + term2) + target += (partial[None,:,:,None]*dZ).sum(0).sum(0) def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ @@ -175,4 +178,3 @@ class rbf(kernpart): self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._Z, self._mu, self._S = Z, mu,S - diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py index 7a5a3368..79c6ff58 100644 --- a/GPy/kern/rbf_ARD.py +++ b/GPy/kern/rbf_ARD.py @@ -74,7 +74,7 @@ class rbf_ARD(kernpart): target += self.variance def dpsi0_dtheta(self,partial,Z,mu,S,target): - target[0] += 1. + target[0] += np.sum(partial) def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass @@ -93,7 +93,9 @@ class rbf_ARD(kernpart): def dpsi1_dZ(self,partial,Z,mu,S,target): self._psi_computations(Z,mu,S) # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) - target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) + denominator = (self.lengthscales2*(self._psi1_denom)) + dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) + target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): """return shapes are N,M,Q""" @@ -118,8 +120,10 @@ class rbf_ARD(kernpart): def dpsi2_dZ(self,partial,Z,mu,S,target): """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) - dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) - target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) + term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q + term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q + dZ = self._psi2[:,:,:,None] * (term1[None] + term2) + target += (partial[None,:,:,None]*dZ).sum(0).sum(0) def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ @@ -247,5 +251,3 @@ if __name__=='__main__': return f,df print "psi2_theta_test" checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) - - diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py new file mode 100644 index 00000000..2c760ec3 --- /dev/null +++ b/GPy/models/BGPLVM.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +import sys, pdb +from GPLVM import GPLVM +from sparse_GP_regression import sparse_GP_regression +from GPy.util.linalg import pdinv + +class Bayesian_GPLVM(sparse_GP_regression, GPLVM): + """ + Bayesian Gaussian Process Latent Variable Model + + :param Y: observed data + :type Y: np.ndarray + :param Q: latent dimensionality + :type Q: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + + """ + def __init__(self, Y, Q, init='PCA', **kwargs): + X = self.initialise_latent(init, Q, Y) + S = np.ones_like(X) * 1e-2# + sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs) + + def get_param_names(self): + X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + return (X_names + S_names + sparse_GP_regression.get_param_names(self)) + + def get_param(self): + """ + Horizontally stacks the parameters in order to present them to the optimizer. + The resulting 1-D array has this structure: + + =============================================================== + | mu | S | Z | beta | theta | + =============================================================== + + """ + return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self))) + + def set_param(self,x): + N, Q = self.N, self.Q + self.X = x[:self.X.size].reshape(N,Q).copy() + self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() + sparse_GP_regression.set_param(self, x[(2*N*Q):]) + + def dL_dmuS(self): + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty) + dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 + dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 + + return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) + + + def log_likelihood_gradients(self): + return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self))) + + def plot(self): + GPLVM.plot(self) + #passing Z without a small amout of jitter will induce the white kernel where we don;t want it! + mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001) + pb.plot(mu[:, 0] , mu[:, 1], 'ko') diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 692d77d1..8caf38b1 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression): """ def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): - self.scale_factor = 10.0 + self.scale_factor = 1000.0 self.beta = beta if Z is None: self.Z = np.random.permutation(X.copy())[:M] @@ -70,7 +70,6 @@ class sparse_GP_regression(GP_regression): self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) - # raise NotImplementedError, "scale psi2 (in kern?)" self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2) else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() @@ -163,10 +162,10 @@ class sparse_GP_regression(GP_regression): """ The derivative of the bound wrt the inducing inputs Z """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) - dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) + dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty) + dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' else: #re-cast computations in psi2 back to psi1: dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)