From 03c1f77c0808cb647c636a2e03b4378e47221db1 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Tue, 12 Feb 2013 17:45:44 +0000 Subject: [PATCH] trying to fix bugs in kerns --- GPy/examples/BGPLVM_demo.py | 10 +++++----- GPy/examples/oil_flow_demo.py | 30 ++++++++++++++++-------------- GPy/kern/bias.py | 4 ++-- GPy/kern/kern.py | 29 ++++++++++++++++++++--------- GPy/kern/linear.py | 9 ++++----- GPy/models/BGPLVM.py | 13 +++++++++++-- 6 files changed, 58 insertions(+), 37 deletions(-) diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py index 760bf4b4..583a66ad 100644 --- a/GPy/examples/BGPLVM_demo.py +++ b/GPy/examples/BGPLVM_demo.py @@ -7,17 +7,17 @@ import GPy np.random.seed(123344) N = 10 -M = 5 -Q = 3 -D = 4 +M = 3 +Q = 4 +D = 5 #generate GPLVM-like data X = np.random.rand(N, Q) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,D).T -# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) -k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001) +k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) +# k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q, 0.00001) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py index 4bcdfa63..91fff4e4 100644 --- a/GPy/examples/oil_flow_demo.py +++ b/GPy/examples/oil_flow_demo.py @@ -7,7 +7,7 @@ import numpy as np import pylab as pb import GPy import pylab as plt -np.random.seed(1) +np.random.seed(3) def plot_oil(X, theta, labels, label): plt.figure() @@ -24,25 +24,27 @@ data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle Y = data['DataTrn'] N, D = Y.shape -selected = np.random.permutation(N)#[:200] +selected = np.random.permutation(N)[:350] labels = data['DataTrnLbls'][selected] Y = Y[selected] N, D = Y.shape Y -= Y.mean(axis=0) -#Y /= Y.std(axis=0) +# Y /= Y.std(axis=0) -Q = 10 -k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q) -m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12) -m.constrain_positive('(rbf|bias|S|white|noise)') -# m.constrain_bounded('white', 1e-6, 100.0) -# m.constrain_bounded('noise', 1e-4, 1000.0) +Q = 5 +k = GPy.kern.linear(Q, ARD = False) + GPy.kern.white(Q) +m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20) +m.constrain_positive('(rbf|bias|S|linear|white|noise)') -plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization') -# m.optimize(messages = True) -m.optimize('tnc', messages = True) -plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM') -# pb.figure() +# m.unconstrain('noise') +# m.constrain_fixed('noise_precision', 50.0) +# m.unconstrain('white') +# m.constrain_bounded('white', 1e-6, 10.0) +# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization') +m.optimize(messages = True) +# m.optimize('tnc', messages = True) +plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM') +# # pb.figure() # m.plot() # pb.title('PCA initialisation') # pb.figure() diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py index 0a6872a5..d97f2f00 100644 --- a/GPy/kern/bias.py +++ b/GPy/kern/bias.py @@ -73,9 +73,9 @@ class bias(kernpart): def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S): pass - + def dpsi2_dtheta(self, partial, Z, mu, S, target): - target += 2.*self.variance*partial.sum() + target += np.sum(2.*self.variance*partial) def dpsi2_dZ(self, partial, Z, mu, S, target): pass diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index e259d505..d8881579 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -6,7 +6,7 @@ import numpy as np from ..core.parameterised import parameterised from kernpart import kernpart import itertools -from product_orthogonal import product_orthogonal +from product_orthogonal import product_orthogonal class kern(parameterised): def __init__(self,D,parts=[], input_slices=None): @@ -323,15 +323,22 @@ class kern(parameterised): slices1, slices2 = self._process_slices(slices1,slices2) [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] - # MASSIVE TODO: do something smart for white - # "crossterms" + + # "crossterms". Here we are recomputing psi1 for white (we don't need to), but it's + # not really expensive, since it's just a matrix of zeroes. # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] + + crossterms = 0.0 + # for 3 kernels this returns something like + # [(0,1), (0,2), (1,2)] + # in theory, we should also account for (1,0), (2,0) and so on, but + # the transpose deals exactly with that # for a,b in itertools.combinations(psi1_matrices, 2): # tmp = np.multiply(a,b) - # target += tmp[:,None,:] + tmp[:, :,None] + # crossterms += tmp[:,None,:] + tmp[:, :,None] - return target + return target + crossterms def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None): """Returns shape (N,M,M,Ntheta)""" @@ -339,22 +346,26 @@ class kern(parameterised): target = np.zeros(self.Nparam) [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] - # # "crossterms" # # 1. get all the psi1 statistics # psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] # [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] - # partial1 = np.zeros_like(partial1) + # partial1 = np.ones_like(partial1) # # 2. get all the dpsi1/dtheta gradients # psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] # [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)] + # # 3. multiply them somehow # for a,b in itertools.combinations(range(len(psi1_matrices)), 2): - # gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0) - # target += (gne[None] + gne[:, None]).sum(0) + # tmp = (psi1_gradients[a][None, None] * psi1_matrices[b][:,:, None]) + # # target += (tmp[None] + tmp[:,None]).sum(0).sum(0).sum(0) + # # gne = (psi1_gradients[a].sum()*psi1_matrices[b].sum()) + # # target += gne + # #target += (gne[None] + gne[:, None]).sum(0) + # target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0) return target def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 5272e4f6..8fb2c867 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -102,8 +102,8 @@ class linear(kernpart): target += tmp.sum() def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S): - target_mu += partial[:, None] * (2.0*mu*self.variances) * mu.shape[0] - target_S += partial[:, None] * self.variances * mu.shape[0] + target_mu += np.sum(partial[:, None],0) * (2.0*mu*self.variances) + target_S += np.sum(partial[:, None] * self.variances, 0) def dpsi0_dZ(self,Z,mu,S,target): pass @@ -140,7 +140,6 @@ class linear(kernpart): else: target += tmp.sum() - def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) @@ -174,6 +173,6 @@ class linear(kernpart): #Z has changed, compute Z specific stuff self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q self._Z = Z - if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): + if not (np.all(mu==self._mu) and np.all(S==self._S)): self.mu2_S = np.square(mu)+S - self._Z, self._mu, self._S = Z, mu,S + self._mu, self._S = mu, S diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py index a7b6ef1d..b42e3b98 100644 --- a/GPy/models/BGPLVM.py +++ b/GPy/models/BGPLVM.py @@ -58,8 +58,17 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM): dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) + dKL_dS = (1. - (1./self.X_uncertainty))*0.5 + dKL_dmu = self.X + return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) + + def KL_divergence(self): + var_mean = np.square(self.X).sum() + var_S = np.sum(self.X_uncertainty - np.log(self.X_uncertainty)) + return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N + + def log_likelihood(self): + return sparse_GP_regression.log_likelihood(self) - self.KL_divergence() def _log_likelihood_gradients(self): return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) -