massive merge of the debug branch

This commit is contained in:
Nicolo Fusi 2013-01-30 15:30:54 +00:00
commit 8a3e10700d
13 changed files with 327 additions and 109 deletions

View file

@ -303,7 +303,7 @@ class model(parameterised):
return '\n'.join(s) return '\n'.join(s)
def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, *args): def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args):
""" """
Check the gradient of the model by comparing to a numerical estimate. Check the gradient of the model by comparing to a numerical estimate.
If the overall gradient fails, invividual components are tested. If the overall gradient fails, invividual components are tested.
@ -323,12 +323,12 @@ class model(parameterised):
gradient = self._log_likelihood_gradients_transformed() gradient = self._log_likelihood_gradients_transformed()
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1-f2)/(2*dx)
ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
if verbose: if verbose:
print "Gradient ratio = ", ratio, '\n' print "Gradient ratio = ", global_ratio, '\n'
sys.stdout.flush() sys.stdout.flush()
if (np.abs(1.-ratio)<tolerance) and not np.isnan(ratio): if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
if verbose: if verbose:
print 'Gradcheck passed' print 'Gradcheck passed'
else: else:
@ -380,7 +380,15 @@ class model(parameterised):
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
print grad_string print grad_string
print '' if verbose:
print ''
return False if return_ratio:
return True return global_ratio
else:
return False
if return_ratio:
return global_ratio
else:
return True

View file

@ -0,0 +1,55 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import cPickle as pickle
import numpy as np
import pylab as pb
import GPy
import pylab as plt
np.random.seed(1)
def plot_oil(X, theta, labels, label):
plt.figure()
X = X[:,np.argsort(theta)[:2]]
flow_type = (X[labels[:,0]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
flow_type = (X[labels[:,1]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
flow_type = (X[labels[:,2]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
plt.title(label)
data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
Y = data['DataTrn']
N, D = Y.shape
selected = np.random.permutation(N)#[:200]
labels = data['DataTrnLbls'][selected]
Y = Y[selected]
N, D = Y.shape
Y -= Y.mean(axis=0)
#Y /= Y.std(axis=0)
Q = 10
k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12)
m.constrain_positive('(rbf|bias|S|white|noise)')
# m.constrain_bounded('white', 1e-6, 100.0)
# m.constrain_bounded('noise', 1e-4, 1000.0)
plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
# m.optimize(messages = True)
m.optimize('tnc', messages = True)
plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM')
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
# m = GPy.models.GPLVM(Y, Q)
# m.constrain_positive('(white|rbf|bias|noise)')
# m.optimize()
# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')

View file

@ -9,19 +9,17 @@ np.random.seed(1)
print "sparse GPLVM with RBF kernel" print "sparse GPLVM with RBF kernel"
N = 100 N = 100
M = 4 M = 8
Q = 2 Q = 1
D = 2 D = 2
#generate GPLVM-like data #generate GPLVM-like data
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q,1.,2*np.ones((1,))) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.sparse_GPLVM(Y, Q, M=M) m = GPy.models.sparse_GPLVM(Y, Q, M=M)
m.constrain_positive('(rbf|bias|noise)') m.constrain_positive('(rbf|bias|noise|white)')
m.constrain_bounded('white', 1e-3, 0.1)
# m.plot()
pb.figure() pb.figure()
m.plot() m.plot()

View file

@ -11,7 +11,7 @@ import numpy as np
import GPy import GPy
np.random.seed(2) np.random.seed(2)
pb.ion() pb.ion()
N = 500 N = 400
M = 5 M = 5
###################################### ######################################
@ -27,20 +27,13 @@ noise = GPy.kern.white(1)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP model
m1 = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
# contrain all parameters to be positive m.constrain_positive('(variance|lengthscale|precision)')
m1.constrain_positive('(variance|lengthscale|precision)')
#m1.constrain_positive('(variance|lengthscale)')
#m1.constrain_fixed('prec',10.)
m.checkgrad(verbose=1)
#check gradient FIXME unit test please m.optimize('tnc', messages = 1)
m1.checkgrad() m.plot()
# optimize and plot
m1.optimize('tnc', messages = 1)
m1.plot()
# print(m1)
###################################### ######################################
## 2 dimensional example ## 2 dimensional example

View file

@ -79,6 +79,7 @@ class Matern52(kernpart):
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*partial)
if self.ARD: if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]

View file

@ -6,7 +6,7 @@ import numpy as np
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from functools import partial from functools import partial
from kernpart import kernpart from kernpart import kernpart
import itertools
class kern(parameterised): class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None): def __init__(self,D,parts=[], input_slices=None):
@ -259,29 +259,56 @@ class kern(parameterised):
:Z: np.ndarray of inducing inputs (M x Q) :Z: np.ndarray of inducing inputs (M x Q)
: mu, S: np.ndarrays of means and variacnes (each N x Q) : mu, S: np.ndarrays of means and variacnes (each N x Q)
:returns psi2: np.ndarray (N,M,M,Q) """ :returns psi2: np.ndarray (N,M,M,Q) """
target = np.zeros((Z.shape[0],Z.shape[0])) target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
# MASSIVE TODO: do something smart for white
# "crossterms"
psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
[p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
for a,b in itertools.combinations(psi1_matrices, 2):
tmp = np.multiply(a,b)
target += tmp[:,None,:] + tmp[:, :,None]
return target return target
def dpsi2_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)""" """Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
# "crossterms"
# 1. get all the psi1 statistics
psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
[p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
partial1 = np.zeros_like(partial1)
# 2. get all the dpsi1/dtheta gradients
psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
[p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
# 3. multiply them somehow
for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0)
target += (gne[None] + gne[:, None]).sum(0)
return target return target
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z) target = np.zeros_like(Z)
[p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target return target
def dpsi2_dmuS(self,Z,mu,S,slices1=None,slices2=None): def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q""" """return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi2_dmuS(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#TODO: there are some extra terms to compute here! #TODO: there are some extra terms to compute here!
return target_mu, target_S return target_mu, target_S

View file

@ -26,6 +26,7 @@ class rbf(kernpart):
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
.. Note: for rbf with different lengthscale on each dimension, see rbf_ARD
""" """
def __init__(self,D,variance=1.,lengthscale=None,ARD=False): def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
@ -118,9 +119,9 @@ class rbf(kernpart):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += np.sum(partial)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
@ -136,13 +137,15 @@ class rbf(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0) denominator = (self.lengthscale2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1) target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -155,20 +158,21 @@ class rbf(kernpart):
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0) d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0) target[1] += np.sum(d_length*partial[:,:,None])
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _psi_computations(self,Z,mu,S): def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2 #here are the "statistics" for psi1 and psi2
@ -198,3 +202,4 @@ class rbf(kernpart):
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S self._Z, self._mu, self._S = Z, mu,S

62
GPy/models/BGPLVM.py Normal file
View file

@ -0,0 +1,62 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
from GPy.util.linalg import pdinv
class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param Q: latent dimensionality
:type Q: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, Q, init='PCA', **kwargs):
X = self.initialise_latent(init, Q, Y)
S = np.ones_like(X) * 1e-2#
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
def get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
return (X_names + S_names + sparse_GP_regression.get_param_names(self))
def get_param(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
===============================================================
| mu | S | Z | beta | theta |
===============================================================
"""
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self)))
def set_param(self,x):
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy()
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
sparse_GP_regression.set_param(self, x[(2*N*Q):])
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
def log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))

View file

@ -54,7 +54,7 @@ class GPLVM(GP_regression):
def plot(self): def plot(self):
assert self.Y.shape[1]==2 assert self.Y.shape[1]==2
pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0) pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
mu, var = self.predict(Xnew) mu, var = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)

View file

@ -3,10 +3,11 @@
from GP_regression import GP_regression from GP_regression import GP_regression
from sparse_GP_regression import sparse_GP_regression from sparse_GP_regression import sparse_GP_regression, sgp_debugB, sgp_debugC, sgp_debugE
from GPLVM import GPLVM from GPLVM import GPLVM
from warped_GP import warpedGP from warped_GP import warpedGP
from GP_EP import GP_EP from GP_EP import GP_EP
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from BGPLVM import Bayesian_GPLVM

View file

@ -37,6 +37,7 @@ class sparse_GP_regression(GP_regression):
""" """
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 1000.0
self.beta = beta self.beta = beta
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] self.Z = np.random.permutation(X.copy())[:M]
@ -59,83 +60,86 @@ class sparse_GP_regression(GP_regression):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd) self.X_uncertainty /= np.square(self._Xstd)
def _set_params(self, p): def _computations(self):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) # TODO find routine to multiply triangular matrices
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
self.beta2 = self.beta**2
self._compute_kernel_matrices()
self._computations()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough) #TODO: slices for psi statistics (easy enough)
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
else: else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T) #self.psi2 = np.dot(self.psi1,self.psi1.T)
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
sf = self.scale_factor
sf2 = sf**2
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
# Compute dL_dpsi # Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T) self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G) self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm # Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def _get_params(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
def log_likelihood(self): def log_likelihood(self):
""" """ Compute the (lower bound on the) log marginal likelihood """
Compute the (lower bound on the) log marginal likelihood sf2 = self.scale_factor**2
""" A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
B = -0.5*self.beta*self.D*self.trace_K C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
C = -0.5*self.D * self.B_logdet D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
D = -0.5*self.beta*self.trYYT return A+B+C+D
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E def set_param(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern.set_param(p[self.Z.size + 1:])
self._computations()
def get_param(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
def get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
def dL_dbeta(self): def dL_dbeta(self):
""" """
Compute the gradient of the log likelihood wrt beta. Compute the gradient of the log likelihood wrt beta.
""" """
#TODO: suport heteroscedatic noise #TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta sf2 = self.scale_factor**2
dB_dbeta = - 0.5 * self.D * self.trace_K dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta) return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self): def dL_dtheta(self):
""" """
@ -145,10 +149,10 @@ class sparse_GP_regression(GP_regression):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
@ -158,31 +162,31 @@ class sparse_GP_regression(GP_regression):
""" """
The derivative of the bound wrt the inducing inputs Z The derivative of the bound wrt the inducing inputs Z
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
def _log_likelihood_gradients(self): def log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()]) return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False): def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation""" """Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew) Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case. var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
else: else:
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case. var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
return mu,var return mu,var

View file

@ -63,8 +63,8 @@ def jitchol(A,maxtries=5):
raise linalg.LinAlgError, "not pd: negative diagonal elements" raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6 jitter= diagA.mean()*1e-6
for i in range(1,maxtries+1): for i in range(1,maxtries+1):
print 'Warning: adding jitter of '+str(jitter)
try: try:
print 'Warning: adding jitter of '+str(jitter)
return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True) return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True)
except: except:
jitter *= 10 jitter *= 10

64
grid_parameters.py Normal file
View file

@ -0,0 +1,64 @@
import numpy as np
import pylab as pb
pb.ion()
import sys
import GPy
pb.close('all')
N = 200
M = 15
resolution=5
X = np.linspace(0,12,N)[:,None]
Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
#k = GPy.kern.rbf(1)
k = GPy.kern.Matern32(1) + GPy.kern.white(1)
models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
models[0].scale_factor = 1.
models[1].scale_factor = 10.
models[2].scale_factor = 100.
models[3].scale_factor = 1000.
#GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
#GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
#GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
[m.constrain_fixed('white',0.1) for m in models]
#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
lls = []
cgs = []
grads = []
count = 0
for l,v in zip(xx.flatten(),yy.flatten()):
count += 1
print count, 'of', resolution**2
sys.stdout.flush()
[m.set('lengthscale',l) for m in models]
[m.set('_variance',10.**v) for m in models]
lls.append([m.log_likelihood() for m in models])
grads.append([m.log_likelihood_gradients() for m in models])
cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
for ll,cg in zip(lls,cgs):
pb.figure()
pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
pb.colorbar()
try:
pb.contour(xx,yy,np.exp(ll),colors='k')
except:
pass
pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
pb.colorbar()