trying to fix bugs in kerns

This commit is contained in:
Nicolo Fusi 2013-02-12 17:45:44 +00:00
parent 7fbc6935d9
commit 03c1f77c08
6 changed files with 58 additions and 37 deletions

View file

@ -7,17 +7,17 @@ import GPy
np.random.seed(123344) np.random.seed(123344)
N = 10 N = 10
M = 5 M = 3
Q = 3 Q = 4
D = 4 D = 5
#generate GPLVM-like data #generate GPLVM-like data
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,D).T
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)

View file

@ -7,7 +7,7 @@ import numpy as np
import pylab as pb import pylab as pb
import GPy import GPy
import pylab as plt import pylab as plt
np.random.seed(1) np.random.seed(3)
def plot_oil(X, theta, labels, label): def plot_oil(X, theta, labels, label):
plt.figure() plt.figure()
@ -24,25 +24,27 @@ data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle
Y = data['DataTrn'] Y = data['DataTrn']
N, D = Y.shape N, D = Y.shape
selected = np.random.permutation(N)#[:200] selected = np.random.permutation(N)[:350]
labels = data['DataTrnLbls'][selected] labels = data['DataTrnLbls'][selected]
Y = Y[selected] Y = Y[selected]
N, D = Y.shape N, D = Y.shape
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
#Y /= Y.std(axis=0) # Y /= Y.std(axis=0)
Q = 10 Q = 5
k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q) k = GPy.kern.linear(Q, ARD = False) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
m.constrain_positive('(rbf|bias|S|white|noise)') m.constrain_positive('(rbf|bias|S|linear|white|noise)')
# m.constrain_bounded('white', 1e-6, 100.0)
# m.constrain_bounded('noise', 1e-4, 1000.0)
plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization') # m.unconstrain('noise')
# m.optimize(messages = True) # m.constrain_fixed('noise_precision', 50.0)
m.optimize('tnc', messages = True) # m.unconstrain('white')
plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM') # m.constrain_bounded('white', 1e-6, 10.0)
# pb.figure() # plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
m.optimize(messages = True)
# m.optimize('tnc', messages = True)
plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
# # pb.figure()
# m.plot() # m.plot()
# pb.title('PCA initialisation') # pb.title('PCA initialisation')
# pb.figure() # pb.figure()

View file

@ -73,9 +73,9 @@ class bias(kernpart):
def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S): def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S):
pass pass
def dpsi2_dtheta(self, partial, Z, mu, S, target): def dpsi2_dtheta(self, partial, Z, mu, S, target):
target += 2.*self.variance*partial.sum() target += np.sum(2.*self.variance*partial)
def dpsi2_dZ(self, partial, Z, mu, S, target): def dpsi2_dZ(self, partial, Z, mu, S, target):
pass pass

View file

@ -6,7 +6,7 @@ import numpy as np
from ..core.parameterised import parameterised from ..core.parameterised import parameterised
from kernpart import kernpart from kernpart import kernpart
import itertools import itertools
from product_orthogonal import product_orthogonal from product_orthogonal import product_orthogonal
class kern(parameterised): class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None): def __init__(self,D,parts=[], input_slices=None):
@ -323,15 +323,22 @@ class kern(parameterised):
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
# MASSIVE TODO: do something smart for white
# "crossterms" # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's
# not really expensive, since it's just a matrix of zeroes.
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
crossterms = 0.0
# for 3 kernels this returns something like
# [(0,1), (0,2), (1,2)]
# in theory, we should also account for (1,0), (2,0) and so on, but
# the transpose deals exactly with that
# for a,b in itertools.combinations(psi1_matrices, 2): # for a,b in itertools.combinations(psi1_matrices, 2):
# tmp = np.multiply(a,b) # tmp = np.multiply(a,b)
# target += tmp[:,None,:] + tmp[:, :,None] # crossterms += tmp[:,None,:] + tmp[:, :,None]
return target return target + crossterms
def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None): def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)""" """Returns shape (N,M,M,Ntheta)"""
@ -339,22 +346,26 @@ class kern(parameterised):
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
# # "crossterms" # # "crossterms"
# # 1. get all the psi1 statistics # # 1. get all the psi1 statistics
# psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
# [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
# partial1 = np.zeros_like(partial1)
# partial1 = np.ones_like(partial1)
# # 2. get all the dpsi1/dtheta gradients # # 2. get all the dpsi1/dtheta gradients
# psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] # psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
# [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)] # [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
# # 3. multiply them somehow # # 3. multiply them somehow
# for a,b in itertools.combinations(range(len(psi1_matrices)), 2): # for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
# gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0)
# target += (gne[None] + gne[:, None]).sum(0) # tmp = (psi1_gradients[a][None, None] * psi1_matrices[b][:,:, None])
# # target += (tmp[None] + tmp[:,None]).sum(0).sum(0).sum(0)
# # gne = (psi1_gradients[a].sum()*psi1_matrices[b].sum())
# # target += gne
# #target += (gne[None] + gne[:, None]).sum(0)
# target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0)
return target return target
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):

View file

@ -102,8 +102,8 @@ class linear(kernpart):
target += tmp.sum() target += tmp.sum()
def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S):
target_mu += partial[:, None] * (2.0*mu*self.variances) * mu.shape[0] target_mu += np.sum(partial[:, None],0) * (2.0*mu*self.variances)
target_S += partial[:, None] * self.variances * mu.shape[0] target_S += np.sum(partial[:, None] * self.variances, 0)
def dpsi0_dZ(self,Z,mu,S,target): def dpsi0_dZ(self,Z,mu,S,target):
pass pass
@ -140,7 +140,6 @@ class linear(kernpart):
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -174,6 +173,6 @@ class linear(kernpart):
#Z has changed, compute Z specific stuff #Z has changed, compute Z specific stuff
self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self._Z = Z self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): if not (np.all(mu==self._mu) and np.all(S==self._S)):
self.mu2_S = np.square(mu)+S self.mu2_S = np.square(mu)+S
self._Z, self._mu, self._S = Z, mu,S self._mu, self._S = mu, S

View file

@ -58,8 +58,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) dKL_dS = (1. - (1./self.X_uncertainty))*0.5
dKL_dmu = self.X
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
def KL_divergence(self):
var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty))
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N
def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self) - self.KL_divergence()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))