Merge branch 'bgplvm' into SGD

This commit is contained in:
Nicolo Fusi 2013-01-30 11:18:31 +00:00
commit c71ee37064
16 changed files with 398 additions and 147 deletions

View file

@ -181,7 +181,7 @@ class model(parameterised):
else: else:
raise e raise e
if len(self.optimization_runs): if len(self.optimization_runs):
i = np.argmax([o.f_opt for o in self.optimization_runs]) i = np.argmin([o.f_opt for o in self.optimization_runs])
self.expand_param(self.optimization_runs[i].x_opt) self.expand_param(self.optimization_runs[i].x_opt)
else: else:
self.expand_param(initial_parameters) self.expand_param(initial_parameters)
@ -286,7 +286,7 @@ class model(parameterised):
return '\n'.join(s) return '\n'.join(s)
def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, *args): def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args):
""" """
Check the gradient of the model by comparing to a numerical estimate. Check the gradient of the model by comparing to a numerical estimate.
If the overall gradient fails, invividual components are tested. If the overall gradient fails, invividual components are tested.
@ -306,12 +306,12 @@ class model(parameterised):
gradient = self.extract_gradients() gradient = self.extract_gradients()
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1-f2)/(2*dx)
ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
if verbose: if verbose:
print "Gradient ratio = ", ratio, '\n' print "Gradient ratio = ", global_ratio, '\n'
sys.stdout.flush() sys.stdout.flush()
if (np.abs(1.-ratio)<tolerance) and not np.isnan(ratio): if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
if verbose: if verbose:
print 'Gradcheck passed' print 'Gradcheck passed'
else: else:
@ -363,7 +363,15 @@ class model(parameterised):
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
print grad_string print grad_string
print '' if verbose:
print ''
return False
return True if return_ratio:
return global_ratio
else:
return False
if return_ratio:
return global_ratio
else:
return True

View file

@ -20,26 +20,28 @@ def plot_oil(X, theta, labels, label):
plt.plot(flow_type[:,0], flow_type[:,1], 'bx') plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
plt.title(label) plt.title(label)
data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r')) data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
Y = data['DataTrn'] Y = data['DataTrn']
N, D = Y.shape N, D = Y.shape
selected = np.random.permutation(N)[:200] selected = np.random.permutation(N)#[:200]
labels = data['DataTrnLbls'][selected] labels = data['DataTrnLbls'][selected]
Y = Y[selected] Y = Y[selected]
N, D = Y.shape N, D = Y.shape
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
Y /= Y.std(axis=0) #Y /= Y.std(axis=0)
Q = 2 Q = 10
m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15) k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q)
m1.constrain_positive('(rbf|bias|noise)') m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12)
m1.constrain_bounded('white', 1e-6, 1.0) m.constrain_positive('(rbf|bias|S|white|noise)')
# m.constrain_bounded('white', 1e-6, 100.0)
# m.constrain_bounded('noise', 1e-4, 1000.0)
plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization') plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
# m.optimize(messages = True) # m.optimize(messages = True)
m1.optimize('bfgs', messages = True) m.optimize('tnc', messages = True)
plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM')
# pb.figure() # pb.figure()
# m.plot() # m.plot()
# pb.title('PCA initialisation') # pb.title('PCA initialisation')
@ -47,7 +49,7 @@ plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM')
# m.optimize(messages = 1) # m.optimize(messages = 1)
# m.plot() # m.plot()
# pb.title('After optimisation') # pb.title('After optimisation')
m = GPy.models.GPLVM(Y, Q) # m = GPy.models.GPLVM(Y, Q)
m.constrain_positive('(white|rbf|bias|noise)') # m.constrain_positive('(white|rbf|bias|noise)')
m.optimize() # m.optimize()
plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') # plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')

View file

@ -9,7 +9,7 @@ np.random.seed(1)
print "sparse GPLVM with RBF kernel" print "sparse GPLVM with RBF kernel"
N = 100 N = 100
M = 4 M = 8
Q = 1 Q = 1
D = 2 D = 2
#generate GPLVM-like data #generate GPLVM-like data
@ -19,9 +19,7 @@ K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.sparse_GPLVM(Y, Q, M=M) m = GPy.models.sparse_GPLVM(Y, Q, M=M)
m.constrain_positive('(rbf|bias|noise)') m.constrain_positive('(rbf|bias|noise|white)')
m.constrain_bounded('white', 1e-3, 0.1)
# m.plot()
pb.figure() pb.figure()
m.plot() m.plot()

View file

@ -11,7 +11,7 @@ import numpy as np
import GPy import GPy
np.random.seed(2) np.random.seed(2)
pb.ion() pb.ion()
N = 500 N = 400
M = 5 M = 5
###################################### ######################################
@ -27,20 +27,13 @@ noise = GPy.kern.white(1)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP model
m1 = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
# contrain all parameters to be positive m.constrain_positive('(variance|lengthscale|precision)')
m1.constrain_positive('(variance|lengthscale|precision)')
#m1.constrain_positive('(variance|lengthscale)')
#m1.constrain_fixed('prec',10.)
m.checkgrad(verbose=1)
#check gradient FIXME unit test please m.optimize('tnc', messages = 1)
m1.checkgrad() m.plot()
# optimize and plot
m1.optimize('tnc', messages = 1)
m1.plot()
# print(m1)
###################################### ######################################
## 2 dimensional example ## 2 dimensional example

View file

@ -78,14 +78,14 @@ class Matern32(kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial) target[0] += np.sum(partial)
def dK_dX(self,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
dK_dX += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,X,target): def dKdiag_dX(self,X,target):
pass pass

View file

@ -33,7 +33,6 @@ class Matern52(kernpart):
self.Nparam = self.D + 1 self.Nparam = self.D + 1
self.name = 'Mat52' self.name = 'Mat52'
self.set_param(np.hstack((variance,lengthscales))) self.set_param(np.hstack((variance,lengthscales)))
def get_param(self): def get_param(self):
"""return the value of the parameters.""" """return the value of the parameters."""
@ -77,12 +76,12 @@ class Matern52(kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial) target[0] += np.sum(partial)
def dK_dX(self,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
dK_dX += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,X,target): def dKdiag_dX(self,X,target):
@ -97,11 +96,11 @@ class Matern52(kernpart):
:param F1: vector of derivatives of F :param F1: vector of derivatives of F
:type F1: np.array :type F1: np.array
:param F2: vector of second derivatives of F :param F2: vector of second derivatives of F
:type F2: np.array :type F2: np.array
:param F3: vector of third derivatives of F :param F3: vector of third derivatives of F
:type F3: np.array :type F3: np.array
:param lower,upper: boundaries of the input domain :param lower,upper: boundaries of the input domain
:type lower,upper: floats :type lower,upper: floats
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):

View file

@ -77,7 +77,7 @@ class exponential(kernpart):
#NB: derivative of diagonal elements wrt lengthscale is 0 #NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial) target[0] += np.sum(partial)
def dK_dX(self,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]

View file

@ -6,7 +6,7 @@ import numpy as np
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from functools import partial from functools import partial
from kernpart import kernpart from kernpart import kernpart
import itertools
class kern(parameterised): class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None): def __init__(self,D,parts=[], input_slices=None):
@ -259,29 +259,54 @@ class kern(parameterised):
:Z: np.ndarray of inducing inputs (M x Q) :Z: np.ndarray of inducing inputs (M x Q)
: mu, S: np.ndarrays of means and variacnes (each N x Q) : mu, S: np.ndarrays of means and variacnes (each N x Q)
:returns psi2: np.ndarray (N,M,M,Q) """ :returns psi2: np.ndarray (N,M,M,Q) """
target = np.zeros((Z.shape[0],Z.shape[0])) target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
# MASSIVE TODO: do something smart for white
# "crossterms"
psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
[p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
for a,b in itertools.combinations(psi1_matrices, 2):
tmp = np.multiply(a,b)
target += tmp[:,None,:] + tmp[:, :,None]
return target return target
def dpsi2_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)""" """Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
# "crossterms"
# 1. get all the psi1 statistics
psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
[p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
# 2. get all the dpsi1/dtheta gradients
psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
[p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
# 3. multiply them somehow
for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0)
target += 0#(gne[None] + gne[:, None]).sum(0)
return target return target
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z) target = np.zeros_like(Z)
[p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target return target
def dpsi2_dmuS(self,Z,mu,S,slices1=None,slices2=None): def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q""" """return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi2_dmuS(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#TODO: there are some extra terms to compute here! #TODO: there are some extra terms to compute here!
return target_mu, target_S return target_mu, target_S

View file

@ -95,9 +95,9 @@ class rbf(kernpart):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += np.sum(partial)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
@ -113,13 +113,15 @@ class rbf(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0) denominator = (self.lengthscale2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1) target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -132,20 +134,21 @@ class rbf(kernpart):
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0) d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1] += np.sum(d_length*partial) target[1] += np.sum(d_length*partial[:,:,None])
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[None,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _psi_computations(self,Z,mu,S): def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2 #here are the "statistics" for psi1 and psi2
@ -175,4 +178,3 @@ class rbf(kernpart):
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S self._Z, self._mu, self._S = Z, mu,S

View file

@ -74,9 +74,9 @@ class rbf_ARD(kernpart):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += np.sum(partial)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
@ -92,41 +92,45 @@ class rbf_ARD(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) denominator = (self.lengthscales2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q""" """return shapes are N,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1) target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise) target += self._psi2
def dpsi2_dtheta(self,Z,mu,S,target): def dpsi2_dtheta(self,partial,Z,mu,S,target):
"""Shape N,M,M,Ntheta""" """Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0) d_var = 2.*self._psi2/self.variance
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
d_length = d_length.sum(0) # d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0) target[1:] += (d_length*partial[:,:,:,None]).sum(0).sum(0).sum(0)
def dpsi2_dZ(self,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q""" """Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[:,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)): if not (np.all(X==self._X) and np.all(X2==self._X2)):
@ -247,5 +251,3 @@ if __name__=='__main__':
return f,df return f,df
print "psi2_theta_test" print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

62
GPy/models/BGPLVM.py Normal file
View file

@ -0,0 +1,62 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import sys, pdb
from GPLVM import GPLVM
from sparse_GP_regression import sparse_GP_regression
from GPy.util.linalg import pdinv
class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
"""
Bayesian Gaussian Process Latent Variable Model
:param Y: observed data
:type Y: np.ndarray
:param Q: latent dimensionality
:type Q: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
"""
def __init__(self, Y, Q, init='PCA', **kwargs):
X = self.initialise_latent(init, Q, Y)
S = np.ones_like(X) * 1e-2#
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
def get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
return (X_names + S_names + sparse_GP_regression.get_param_names(self))
def get_param(self):
"""
Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure:
===============================================================
| mu | S | Z | beta | theta |
===============================================================
"""
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self)))
def set_param(self,x):
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy()
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
sparse_GP_regression.set_param(self, x[(2*N*Q):])
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
def log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))

View file

@ -54,7 +54,7 @@ class GPLVM(GP_regression):
def plot(self): def plot(self):
assert self.Y.shape[1]==2 assert self.Y.shape[1]==2
pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0) pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
mu, var = self.predict(Xnew) mu, var = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)

View file

@ -3,10 +3,11 @@
from GP_regression import GP_regression from GP_regression import GP_regression
from sparse_GP_regression import sparse_GP_regression from sparse_GP_regression import sparse_GP_regression, sgp_debugB, sgp_debugC, sgp_debugE
from GPLVM import GPLVM from GPLVM import GPLVM
from warped_GP import warpedGP from warped_GP import warpedGP
from GP_EP import GP_EP from GP_EP import GP_EP
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from BGPLVM import Bayesian_GPLVM

View file

@ -37,6 +37,7 @@ class sparse_GP_regression(GP_regression):
""" """
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 1000.0
self.beta = beta self.beta = beta
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] self.Z = np.random.permutation(X.copy())[:M]
@ -59,52 +60,67 @@ class sparse_GP_regression(GP_regression):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.X_uncertainty /= np.square(self._Xstd) self.X_uncertainty /= np.square(self._Xstd)
def set_param(self, p): def _computations(self):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) # TODO find routine to multiply triangular matrices
self.beta = p[self.M*self.Q]
self.kern.set_param(p[self.Z.size + 1:])
self.beta2 = self.beta**2
self._compute_kernel_matrices()
self._computations()
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
#TODO: slices for psi statistics (easy enough) #TODO: slices for psi statistics (easy enough)
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z) self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0)
else: else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = np.dot(self.psi1,self.psi1.T) #self.psi2 = np.dot(self.psi1,self.psi1.T)
#self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta))
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
sf = self.scale_factor
sf2 = sf**2
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3)
self.V = (self.beta/self.scale_factor)*self.Y
self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T)
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
def _computations(self):
# TODO find routine to multiply triangular matrices
self.V = self.beta*self.Y
self.psi1V = np.dot(self.psi1, self.V) self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
self.B = np.eye(self.M) + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.LLambdai = np.dot(self.LBi, self.Lmi)
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.C = mdot(self.LLambdai, self.psi1V)
self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T)
# Compute dL_dpsi # Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T) self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G) self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD
# Compute dL_dKmm # Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT
B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D
def set_param(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern.set_param(p[self.Z.size + 1:])
self._computations()
def get_param(self): def get_param(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()]) return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
@ -112,30 +128,18 @@ class sparse_GP_regression(GP_regression):
def get_param_names(self): def get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names() return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
def log_likelihood(self):
"""
Compute the (lower bound on the) log marginal likelihood
"""
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return A+B+C+D+E
def dL_dbeta(self): def dL_dbeta(self):
""" """
Compute the gradient of the log likelihood wrt beta. Compute the gradient of the log likelihood wrt beta.
""" """
#TODO: suport heteroscedatic noise #TODO: suport heteroscedatic noise
dA_dbeta = 0.5 * self.N*self.D/self.beta sf2 = self.scale_factor**2
dB_dbeta = - 0.5 * self.D * self.trace_K dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT
dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2)
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta) return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta)
def dL_dtheta(self): def dL_dtheta(self):
""" """
@ -145,10 +149,10 @@ class sparse_GP_regression(GP_regression):
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
@ -158,13 +162,13 @@ class sparse_GP_regression(GP_regression):
""" """
The derivative of the bound wrt the inducing inputs Z The derivative of the bound wrt the inducing inputs Z
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes'
else: else:
#re-cast computations in psi2 back to psi1: #re-cast computations in psi2 back to psi1:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1)
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
@ -175,14 +179,14 @@ class sparse_GP_regression(GP_regression):
"""Internal helper function for making predictions, does not account for normalisation""" """Internal helper function for making predictions, does not account for normalisation"""
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.LBL_inv, self.psi1V) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew) Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case. var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case.
else: else:
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case. var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case.
return mu,var return mu,var
@ -197,3 +201,94 @@ class sparse_GP_regression(GP_regression):
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2: if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo') pb.plot(self.Z[:,0],self.Z[:,1],'wo')
class sgp_debugB(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*( - self.Kmmi))
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return B
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dB_dbeta)
class sgp_debugC(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = np.zeros(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv))
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return C
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dC_dbeta)
class sgp_debugE(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = np.zeros(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.G)
# Compute dL_dKmm
tmp = mdot(self.beta*self.psi2, self.Kmmi, self.psi1VVpsi1)
self.dL_dKmm = -0.5*mdot(self.Kmmi,tmp + tmp.T + self.psi1VVpsi1,self.Kmmi)
#self.dL_dKmm = np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return E
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dE_dbeta)

View file

@ -63,8 +63,8 @@ def jitchol(A,maxtries=5):
raise linalg.LinAlgError, "not pd: negative diagonal elements" raise linalg.LinAlgError, "not pd: negative diagonal elements"
jitter= diagA.mean()*1e-6 jitter= diagA.mean()*1e-6
for i in range(1,maxtries+1): for i in range(1,maxtries+1):
print 'Warning: adding jitter of '+str(jitter)
try: try:
print 'Warning: adding jitter of '+str(jitter)
return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True) return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True)
except: except:
jitter *= 10 jitter *= 10

64
grid_parameters.py Normal file
View file

@ -0,0 +1,64 @@
import numpy as np
import pylab as pb
pb.ion()
import sys
import GPy
pb.close('all')
N = 200
M = 15
resolution=5
X = np.linspace(0,12,N)[:,None]
Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now)
Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.)
#k = GPy.kern.rbf(1)
k = GPy.kern.Matern32(1) + GPy.kern.white(1)
models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)
,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)]
models[0].scale_factor = 1.
models[1].scale_factor = 10.
models[2].scale_factor = 100.
models[3].scale_factor = 1000.
#GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k),
#GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#,
#GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)]
[m.constrain_fixed('white',0.1) for m in models]
#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j]
xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j]
lls = []
cgs = []
grads = []
count = 0
for l,v in zip(xx.flatten(),yy.flatten()):
count += 1
print count, 'of', resolution**2
sys.stdout.flush()
[m.set('lengthscale',l) for m in models]
[m.set('_variance',10.**v) for m in models]
lls.append([m.log_likelihood() for m in models])
grads.append([m.log_likelihood_gradients() for m in models])
cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models])
lls = np.array(zip(*lls)).reshape(-1,resolution,resolution)
cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution)
for ll,cg in zip(lls,cgs):
pb.figure()
pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray)
pb.colorbar()
try:
pb.contour(xx,yy,np.exp(ll),colors='k')
except:
pass
pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0)
pb.colorbar()