BGPLVM working with rbf+white

This commit is contained in:
Nicolo Fusi 2013-01-29 14:02:41 +00:00
parent 8b6e244cf1
commit 936d08723e
5 changed files with 105 additions and 32 deletions

View file

@ -20,26 +20,28 @@ def plot_oil(X, theta, labels, label):
plt.plot(flow_type[:,0], flow_type[:,1], 'bx') plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
plt.title(label) plt.title(label)
data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r')) data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
Y = data['DataTrn'] Y = data['DataTrn']
N, D = Y.shape N, D = Y.shape
selected = np.random.permutation(N)[:200] selected = np.random.permutation(N)#[:200]
labels = data['DataTrnLbls'][selected] labels = data['DataTrnLbls'][selected]
Y = Y[selected] Y = Y[selected]
N, D = Y.shape N, D = Y.shape
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
Y /= Y.std(axis=0) #Y /= Y.std(axis=0)
Q = 2 Q = 7
m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15) k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q)
m1.constrain_positive('(rbf|bias|noise)') m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12)
m1.constrain_bounded('white', 1e-6, 1.0) m.constrain_positive('(rbf|bias|S|white|noise)')
# m.constrain_bounded('white', 1e-6, 100.0)
# m.constrain_bounded('noise', 1e-4, 1000.0)
plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization') plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
# m.optimize(messages = True) # m.optimize(messages = True)
m1.optimize('bfgs', messages = True) m.optimize('tnc', messages = True)
plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM')
# pb.figure() # pb.figure()
# m.plot() # m.plot()
# pb.title('PCA initialisation') # pb.title('PCA initialisation')
@ -47,7 +49,7 @@ plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM')
# m.optimize(messages = 1) # m.optimize(messages = 1)
# m.plot() # m.plot()
# pb.title('After optimisation') # pb.title('After optimisation')
m = GPy.models.GPLVM(Y, Q) # m = GPy.models.GPLVM(Y, Q)
m.constrain_positive('(white|rbf|bias|noise)') # m.constrain_positive('(white|rbf|bias|noise)')
m.optimize() # m.optimize()
plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') # plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')

View file

@ -95,7 +95,7 @@ class rbf(kernpart):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += np.sum(partial)
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
@ -113,7 +113,9 @@ class rbf(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0) denominator = (self.lengthscale2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -132,13 +134,14 @@ class rbf(kernpart):
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0) d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1] += np.sum(d_length*partial) target[1] += np.sum(d_length*partial[:,:,None])
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
@ -175,4 +178,3 @@ class rbf(kernpart):
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S self._Z, self._mu, self._S = Z, mu,S

View file

@ -74,7 +74,7 @@ class rbf_ARD(kernpart):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += np.sum(partial)
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
@ -93,7 +93,9 @@ class rbf_ARD(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
# np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) denominator = (self.lengthscales2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q""" """return shapes are N,M,Q"""
@ -118,8 +120,10 @@ class rbf_ARD(kernpart):
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q""" """Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
@ -247,5 +251,3 @@ if __name__=='__main__':
return f,df return f,df
print "psi2_theta_test" print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

68
GPy/models/BGPLVM.py Normal file
View file

@ -0,0 +1,68 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
from GPy.util.linalg import pdinv
class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param Q: latent dimensionality
:type Q: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, Q, init='PCA', **kwargs):
X = self.initialise_latent(init, Q, Y)
S = np.ones_like(X) * 1e-2#
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
def get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
return (X_names + S_names + sparse_GP_regression.get_param_names(self))
def get_param(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
===============================================================
| mu | S | Z | beta | theta |
===============================================================
"""
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self)))
def set_param(self,x):
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy()
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
sparse_GP_regression.set_param(self, x[(2*N*Q):])
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
def log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
#passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')

View file

@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression):
""" """
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 10.0 self.scale_factor = 1000.0
self.beta = beta self.beta = beta
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] self.Z = np.random.permutation(X.copy())[:M]
@ -70,7 +70,6 @@ class sparse_GP_regression(GP_regression):
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
# raise NotImplementedError, "scale psi2 (in kern?)"
self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2) self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2)
else: else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
@ -163,10 +162,10 @@ class sparse_GP_regression(GP_regression):
""" """
The derivative of the bound wrt the inducing inputs Z The derivative of the bound wrt the inducing inputs Z
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1)