From 0ac023eaded81ac54cfd8bf0442569561bcbe790 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 11 Jan 2013 17:19:42 +0000 Subject: [PATCH 01/19] parameters gridding with checkgrad to aid debugging --- grid_parameters.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 grid_parameters.py diff --git a/grid_parameters.py b/grid_parameters.py new file mode 100644 index 00000000..8c4c1e0a --- /dev/null +++ b/grid_parameters.py @@ -0,0 +1,51 @@ +import numpy as np +import pylab as pb +pb.ion() +import sys +import GPy + +pb.close('all') + +N = 1000 +M = 10 +resolution=5 + +X = np.linspace(0,12,N)[:,None] +Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now) +Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.) +k = GPy.kern.rbf(1) + + +m = GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) +m.constrain_fixed('iip') +#m.constrain_fixed('white',1e-6) +m.constrain_fixed('precision',50) +m.ensure_default_constraints() + + +xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] + +lls = [] +cgs = [] +for l,v in zip(xx.flatten(),yy.flatten()): + m.set('lengthscale',l) + m.set('rbf_variance',10.**v) + lls.append(m.log_likelihood()) + cgs.append(m.checkgrad()) + #m.plot() + +lls = np.array(lls).reshape(resolution,resolution) +cgs = np.array(cgs,dtype=np.float64).reshape(resolution,resolution) + +pb.contourf(xx,yy,lls,np.linspace(-500,560,100),linewidths=2,cmap=pb.cm.jet) +pb.colorbar() +pb.scatter(xx.flatten(),yy.flatten(),10,cgs.flatten(),linewidth=0,cmap=pb.cm.gray) +pb.figure() +#pb.imshow(lls,origin='upper',cmap=pb.cm.jet,extent=[xx[0,0],xx[-1,0],yy[0].min(),yy[0].max()],vmin=-500) +pb.scatter(xx.flatten(),yy.flatten(),10,lls.flatten(),linewidth=0,cmap=pb.cm.jet) +pb.colorbar() +pb.figure() +#pb.imshow(cgs,origin='upper',cmap=pb.cm.jet,extent=[xx[0,0],xx[-1,0],yy[0].min(),yy[0].max()]) +pb.scatter(xx.flatten(),yy.flatten(),10,cgs.flatten(),linewidth=0,cmap=pb.cm.jet) +pb.colorbar() + From 1a135ca9f7d98f63a3183895f27021308da4d9be Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 11 Jan 2013 17:54:10 +0000 Subject: [PATCH 02/19] some code to debug the sprase GP gradients with. --- GPy/core/model.py | 5 +- GPy/models/__init__.py | 2 +- GPy/models/sparse_GP_regression.py | 124 +++++++++++++++++++++++++++++ grid_parameters.py | 37 ++++----- 4 files changed, 144 insertions(+), 24 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 46cf6ac9..183cf0f0 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -363,7 +363,8 @@ class model(parameterised): grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) print grad_string - print '' - + if verbose: + print '' + return False return True diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index ab7ff5b4..3b8b31c8 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -3,7 +3,7 @@ from GP_regression import GP_regression -from sparse_GP_regression import sparse_GP_regression +from sparse_GP_regression import sparse_GP_regression, sgp_debugB, sgp_debugC, sgp_debugE from GPLVM import GPLVM from warped_GP import warpedGP from GP_EP import GP_EP diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index fe5f7cc1..45376cf2 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -197,3 +197,127 @@ class sparse_GP_regression(GP_regression): pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) if self.Q==2: pb.plot(self.Z[:,0],self.Z[:,1],'wo') + +class sgp_debugB(sparse_GP_regression): + def _computations(self): + self.V = self.beta*self.Y + self.psi1V = np.dot(self.psi1, self.V) + self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) + self.B = np.eye(self.M) + self.A + self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + self.LLambdai = np.dot(self.LBi, self.Lmi) + self.trace_K = self.psi0 - np.trace(self.A)/self.beta + self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) + self.C = mdot(self.LLambdai, self.psi1V) + self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + + # Compute dL_dpsi + self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) + self.dL_dpsi1 = np.zeros_like(self.psi1) + self.dL_dpsi2 = - 0.5 * self.beta * (self.D*( - self.Kmmi)) + + # Compute dL_dKmm + self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB + + def log_likelihood(self): + A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) + B = -0.5*self.beta*self.D*self.trace_K + C = -0.5*self.D * self.B_logdet + D = -0.5*self.beta*self.trYYT + E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) + return B + + def dL_dbeta(self): + dA_dbeta = 0.5 * self.N*self.D/self.beta + dB_dbeta = - 0.5 * self.D * self.trace_K + dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta + dD_dbeta = - 0.5 * self.trYYT + tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) + dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta + return np.squeeze(dB_dbeta) + + +class sgp_debugC(sparse_GP_regression): + def _computations(self): + self.V = self.beta*self.Y + self.psi1V = np.dot(self.psi1, self.V) + self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) + self.B = np.eye(self.M) + self.A + self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + self.LLambdai = np.dot(self.LBi, self.Lmi) + self.trace_K = self.psi0 - np.trace(self.A)/self.beta + self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) + self.C = mdot(self.LLambdai, self.psi1V) + self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + + # Compute dL_dpsi + self.dL_dpsi0 = np.zeros(self.N) + self.dL_dpsi1 = np.zeros_like(self.psi1) + self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv)) + + # Compute dL_dKmm + self.dL_dKmm = -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC + + def log_likelihood(self): + A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) + B = -0.5*self.beta*self.D*self.trace_K + C = -0.5*self.D * self.B_logdet + D = -0.5*self.beta*self.trYYT + E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) + return C + + def dL_dbeta(self): + dA_dbeta = 0.5 * self.N*self.D/self.beta + dB_dbeta = - 0.5 * self.D * self.trace_K + dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta + dD_dbeta = - 0.5 * self.trYYT + tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) + dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta + return np.squeeze(dC_dbeta) + + +class sgp_debugE(sparse_GP_regression): + def _computations(self): + self.V = self.beta*self.Y + self.psi1V = np.dot(self.psi1, self.V) + self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) + self.B = np.eye(self.M) + self.A + self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + self.LLambdai = np.dot(self.LBi, self.Lmi) + self.trace_K = self.psi0 - np.trace(self.A)/self.beta + self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) + self.C = mdot(self.LLambdai, self.psi1V) + self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + + # Compute dL_dpsi + self.dL_dpsi0 = np.zeros(self.N) + self.dL_dpsi1 = np.zeros_like(self.psi1) + self.dL_dpsi2 = - 0.5 * self.beta * (self.G) + + # Compute dL_dKmm + self.dL_dKmm = np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE + + def log_likelihood(self): + A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) + B = -0.5*self.beta*self.D*self.trace_K + C = -0.5*self.D * self.B_logdet + D = -0.5*self.beta*self.trYYT + E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) + return E + + def dL_dbeta(self): + dA_dbeta = 0.5 * self.N*self.D/self.beta + dB_dbeta = - 0.5 * self.D * self.trace_K + dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta + dD_dbeta = - 0.5 * self.trYYT + tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) + dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta + return np.squeeze(dE_dbeta) + + diff --git a/grid_parameters.py b/grid_parameters.py index 8c4c1e0a..421eef10 100644 --- a/grid_parameters.py +++ b/grid_parameters.py @@ -16,11 +16,14 @@ Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.) k = GPy.kern.rbf(1) -m = GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) -m.constrain_fixed('iip') +models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k), + GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k), + GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k), + GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)] +#[m.constrain_fixed('iip') for m in models] #m.constrain_fixed('white',1e-6) -m.constrain_fixed('precision',50) -m.ensure_default_constraints() +#[m.constrain_fixed('precision',50) for m in models] +#[m.ensure_default_constraints() for m in models] xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] @@ -28,24 +31,16 @@ xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] lls = [] cgs = [] for l,v in zip(xx.flatten(),yy.flatten()): - m.set('lengthscale',l) - m.set('rbf_variance',10.**v) - lls.append(m.log_likelihood()) - cgs.append(m.checkgrad()) - #m.plot() + [m.set('lengthscale',l) for m in models] + [m.set('rbf_variance',10.**v) for m in models] + lls.append(models[0].log_likelihood()) + cgs.append([m.checkgrad(verbose=0) for m in models]) lls = np.array(lls).reshape(resolution,resolution) -cgs = np.array(cgs,dtype=np.float64).reshape(resolution,resolution) +cgs = np.array(zip(*cgs),dtype=np.float64).reshape(-1,resolution,resolution) -pb.contourf(xx,yy,lls,np.linspace(-500,560,100),linewidths=2,cmap=pb.cm.jet) -pb.colorbar() -pb.scatter(xx.flatten(),yy.flatten(),10,cgs.flatten(),linewidth=0,cmap=pb.cm.gray) -pb.figure() -#pb.imshow(lls,origin='upper',cmap=pb.cm.jet,extent=[xx[0,0],xx[-1,0],yy[0].min(),yy[0].max()],vmin=-500) -pb.scatter(xx.flatten(),yy.flatten(),10,lls.flatten(),linewidth=0,cmap=pb.cm.jet) -pb.colorbar() -pb.figure() -#pb.imshow(cgs,origin='upper',cmap=pb.cm.jet,extent=[xx[0,0],xx[-1,0],yy[0].min(),yy[0].max()]) -pb.scatter(xx.flatten(),yy.flatten(),10,cgs.flatten(),linewidth=0,cmap=pb.cm.jet) -pb.colorbar() +for cg in cgs: + pb.figure() + pb.contourf(xx,yy,lls,cmap=pb.cm.jet) + pb.scatter(xx.flatten(),yy.flatten(),12,cg.flatten(),cmap=pb.cm.gray) From c949da2d67ec2bc0d13138415bce7a6a539a82c5 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 11 Jan 2013 17:58:19 +0000 Subject: [PATCH 03/19] allowed the gradchecker to return the gradient ratio Just to help with debugging. --- GPy/core/model.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 183cf0f0..cd37ea33 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -286,7 +286,7 @@ class model(parameterised): return '\n'.join(s) - def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, *args): + def checkgrad(self, verbose=False, include_priors=False, step=1e-6, tolerance = 1e-3, return_ratio=False, *args): """ Check the gradient of the model by comparing to a numerical estimate. If the overall gradient fails, invividual components are tested. @@ -306,12 +306,12 @@ class model(parameterised): gradient = self.extract_gradients() numerical_gradient = (f1-f2)/(2*dx) - ratio = (f1-f2)/(2*np.dot(dx,gradient)) + global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) if verbose: - print "Gradient ratio = ", ratio, '\n' + print "Gradient ratio = ", global_ratio, '\n' sys.stdout.flush() - if (np.abs(1.-ratio) Date: Fri, 11 Jan 2013 18:04:22 +0000 Subject: [PATCH 04/19] plotting changes --- grid_parameters.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/grid_parameters.py b/grid_parameters.py index 421eef10..302210cf 100644 --- a/grid_parameters.py +++ b/grid_parameters.py @@ -34,7 +34,7 @@ for l,v in zip(xx.flatten(),yy.flatten()): [m.set('lengthscale',l) for m in models] [m.set('rbf_variance',10.**v) for m in models] lls.append(models[0].log_likelihood()) - cgs.append([m.checkgrad(verbose=0) for m in models]) + cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models]) lls = np.array(lls).reshape(resolution,resolution) cgs = np.array(zip(*cgs),dtype=np.float64).reshape(-1,resolution,resolution) @@ -42,5 +42,7 @@ cgs = np.array(zip(*cgs),dtype=np.float64).reshape(-1,resolution,resolution) for cg in cgs: pb.figure() pb.contourf(xx,yy,lls,cmap=pb.cm.jet) - pb.scatter(xx.flatten(),yy.flatten(),12,cg.flatten(),cmap=pb.cm.gray) + pb.colorbar() + pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.gray,linewidth=0) + pb.colorbar() From 0ff5cb8447c0e697cd6e58e17855d60f645991bc Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 15 Jan 2013 09:33:25 +0000 Subject: [PATCH 05/19] changed the colous of plotting in the grid_parameters debug script --- grid_parameters.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/grid_parameters.py b/grid_parameters.py index 302210cf..e15aab6e 100644 --- a/grid_parameters.py +++ b/grid_parameters.py @@ -8,7 +8,7 @@ pb.close('all') N = 1000 M = 10 -resolution=5 +resolution=3 X = np.linspace(0,12,N)[:,None] Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now) @@ -41,8 +41,8 @@ cgs = np.array(zip(*cgs),dtype=np.float64).reshape(-1,resolution,resolution) for cg in cgs: pb.figure() - pb.contourf(xx,yy,lls,cmap=pb.cm.jet) + pb.contourf(xx,yy,lls,50,cmap=pb.cm.gray) pb.colorbar() - pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.gray,linewidth=0) + pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0) pb.colorbar() From 4241a7c24f1edbf57c3ac18b80dca4562a1d6eb5 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Tue, 15 Jan 2013 09:34:47 +0000 Subject: [PATCH 06/19] simplified the debug classes --- GPy/models/sparse_GP_regression.py | 42 ++++-------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 45376cf2..561aaca4 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -37,6 +37,7 @@ class sparse_GP_regression(GP_regression): """ def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): + self.stabilit_multiplier = 1. self.beta = beta if Z is None: self.Z = np.random.permutation(X.copy())[:M] @@ -88,7 +89,7 @@ class sparse_GP_regression(GP_regression): self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) - self.B = np.eye(self.M) + self.A + self.B = np.eye(self.M)*self.stabilit_multiplier + self.A self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) self.LLambdai = np.dot(self.LBi, self.Lmi) self.trace_K = self.psi0 - np.trace(self.A)/self.beta @@ -200,18 +201,7 @@ class sparse_GP_regression(GP_regression): class sgp_debugB(sparse_GP_regression): def _computations(self): - self.V = self.beta*self.Y - self.psi1V = np.dot(self.psi1, self.V) - self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) - self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) - self.B = np.eye(self.M) + self.A - self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) - self.LLambdai = np.dot(self.LBi, self.Lmi) - self.trace_K = self.psi0 - np.trace(self.A)/self.beta - self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) - self.C = mdot(self.LLambdai, self.psi1V) - self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + sparse_GP_regression._computations(self) # Compute dL_dpsi self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) @@ -241,18 +231,7 @@ class sgp_debugB(sparse_GP_regression): class sgp_debugC(sparse_GP_regression): def _computations(self): - self.V = self.beta*self.Y - self.psi1V = np.dot(self.psi1, self.V) - self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) - self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) - self.B = np.eye(self.M) + self.A - self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) - self.LLambdai = np.dot(self.LBi, self.Lmi) - self.trace_K = self.psi0 - np.trace(self.A)/self.beta - self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) - self.C = mdot(self.LLambdai, self.psi1V) - self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + sparse_GP_regression._computations(self) # Compute dL_dpsi self.dL_dpsi0 = np.zeros(self.N) @@ -282,18 +261,7 @@ class sgp_debugC(sparse_GP_regression): class sgp_debugE(sparse_GP_regression): def _computations(self): - self.V = self.beta*self.Y - self.psi1V = np.dot(self.psi1, self.V) - self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) - self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) - self.B = np.eye(self.M) + self.A - self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) - self.LLambdai = np.dot(self.LBi, self.Lmi) - self.trace_K = self.psi0 - np.trace(self.A)/self.beta - self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) - self.C = mdot(self.LLambdai, self.psi1V) - self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + sparse_GP_regression._computations(self) # Compute dL_dpsi self.dL_dpsi0 = np.zeros(self.N) From a6300fab1038ca64a971dc3a57e3a319feb00fb9 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 16 Jan 2013 11:21:06 +0000 Subject: [PATCH 07/19] fixed up dK_dX in the exponential and Matern kerns --- GPy/kern/Matern32.py | 6 +++--- GPy/kern/Matern52.py | 11 +++++------ GPy/kern/exponential.py | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py index 8223b37a..377b687f 100644 --- a/GPy/kern/Matern32.py +++ b/GPy/kern/Matern32.py @@ -78,14 +78,14 @@ class Matern32(kernpart): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(partial) - def dK_dX(self,X,X2,target): + def dK_dX(self,partial,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) - dK_dX += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) + dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) target += np.sum(dK_dX*partial.T[:,:,None],0) - + def dKdiag_dX(self,X,target): pass diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py index 65059a5b..172c0117 100644 --- a/GPy/kern/Matern52.py +++ b/GPy/kern/Matern52.py @@ -33,7 +33,6 @@ class Matern52(kernpart): self.Nparam = self.D + 1 self.name = 'Mat52' self.set_param(np.hstack((variance,lengthscales))) - def get_param(self): """return the value of the parameters.""" @@ -77,12 +76,12 @@ class Matern52(kernpart): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(partial) - def dK_dX(self,X,X2,target): + def dK_dX(self,partial,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) - dK_dX += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) + dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) target += np.sum(dK_dX*partial.T[:,:,None],0) def dKdiag_dX(self,X,target): @@ -97,11 +96,11 @@ class Matern52(kernpart): :param F1: vector of derivatives of F :type F1: np.array :param F2: vector of second derivatives of F - :type F2: np.array + :type F2: np.array :param F3: vector of third derivatives of F - :type F3: np.array + :type F3: np.array :param lower,upper: boundaries of the input domain - :type lower,upper: floats + :type lower,upper: floats """ assert self.D == 1 def L(x,i): diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py index ba97881e..8ad5b813 100644 --- a/GPy/kern/exponential.py +++ b/GPy/kern/exponential.py @@ -77,7 +77,7 @@ class exponential(kernpart): #NB: derivative of diagonal elements wrt lengthscale is 0 target[0] += np.sum(partial) - def dK_dX(self,X,X2,target): + def dK_dX(self,partial,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] From 9f05744374f33439d6433df98c1836278689dc20 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 16 Jan 2013 11:21:55 +0000 Subject: [PATCH 08/19] general changes to bebugging code --- GPy/models/sparse_GP_regression.py | 4 ++- GPy/util/linalg.py | 2 +- grid_parameters.py | 41 ++++++++++++++++++------------ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 561aaca4..6313f4f7 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -269,7 +269,9 @@ class sgp_debugE(sparse_GP_regression): self.dL_dpsi2 = - 0.5 * self.beta * (self.G) # Compute dL_dKmm - self.dL_dKmm = np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE + tmp = mdot(self.beta*self.psi2, self.Kmmi, self.psi1VVpsi1) + self.dL_dKmm = -0.5*mdot(self.Kmmi,tmp + tmp.T + self.psi1VVpsi1,self.Kmmi) + #self.dL_dKmm = np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE def log_likelihood(self): A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 092589a9..7414eb29 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -63,8 +63,8 @@ def jitchol(A,maxtries=5): raise linalg.LinAlgError, "not pd: negative diagonal elements" jitter= diagA.mean()*1e-6 for i in range(1,maxtries+1): + print 'Warning: adding jitter of '+str(jitter) try: - print 'Warning: adding jitter of '+str(jitter) return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True) except: jitter *= 10 diff --git a/grid_parameters.py b/grid_parameters.py index e15aab6e..011204ee 100644 --- a/grid_parameters.py +++ b/grid_parameters.py @@ -8,41 +8,50 @@ pb.close('all') N = 1000 M = 10 -resolution=3 +resolution=5 X = np.linspace(0,12,N)[:,None] Z = np.linspace(0,12,M)[:,None] # inducing points (fixed for now) Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.) -k = GPy.kern.rbf(1) - +#k = GPy.kern.rbf(1) +k = GPy.kern.Matern32(1) + GPy.kern.white(1) models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k), GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k), - GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k), - GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)] -#[m.constrain_fixed('iip') for m in models] -#m.constrain_fixed('white',1e-6) -#[m.constrain_fixed('precision',50) for m in models] -#[m.ensure_default_constraints() for m in models] + GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#, + #GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)] +[m.constrain_fixed('white',0.001) for m in models] -xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] +#xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] +xx,yy = np.mgrid[3:16:0+resolution*1j,-2:2:0+resolution*1j] lls = [] cgs = [] +grads = [] +count = 0 for l,v in zip(xx.flatten(),yy.flatten()): + count += 1 + print count, 'of', resolution**2 + sys.stdout.flush() + [m.set('lengthscale',l) for m in models] - [m.set('rbf_variance',10.**v) for m in models] - lls.append(models[0].log_likelihood()) + [m.set('_variance',10.**v) for m in models] + lls.append([m.log_likelihood() for m in models]) + grads.append([m.log_likelihood_gradients() for m in models]) cgs.append([m.checkgrad(verbose=0,return_ratio=True) for m in models]) -lls = np.array(lls).reshape(resolution,resolution) -cgs = np.array(zip(*cgs),dtype=np.float64).reshape(-1,resolution,resolution) +lls = np.array(zip(*lls)).reshape(-1,resolution,resolution) +cgs = np.array(zip(*cgs)).reshape(-1,resolution,resolution) -for cg in cgs: +for ll,cg in zip(lls,cgs): pb.figure() - pb.contourf(xx,yy,lls,50,cmap=pb.cm.gray) + pb.contourf(xx,yy,ll,100,cmap=pb.cm.gray) pb.colorbar() + try: + pb.contour(xx,yy,np.exp(ll),colors='k') + except: + pass pb.scatter(xx.flatten(),yy.flatten(),20,np.log(np.abs(cg.flatten())),cmap=pb.cm.jet,linewidth=0) pb.colorbar() From 126b45673b23640a1cae96b92ff69feae42202cc Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 18 Jan 2013 15:09:48 +0000 Subject: [PATCH 09/19] Scale factor added to sparse_GP_regression and sparse_GP_demo ammended to be less annoying (m1) --- GPy/examples/sparse_GP_regression_demo.py | 21 +++--- GPy/models/sparse_GP_regression.py | 92 ++++++++++++----------- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/GPy/examples/sparse_GP_regression_demo.py b/GPy/examples/sparse_GP_regression_demo.py index 6424b8f2..d67a3831 100644 --- a/GPy/examples/sparse_GP_regression_demo.py +++ b/GPy/examples/sparse_GP_regression_demo.py @@ -11,8 +11,8 @@ import numpy as np import GPy np.random.seed(2) pb.ion() -N = 500 -M = 5 +N = 1200 +M = 20 ###################################### ## 1 dimensional example @@ -27,20 +27,21 @@ noise = GPy.kern.white(1) kernel = rbf + noise # create simple GP model -m1 = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) +m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) # contrain all parameters to be positive -m1.constrain_positive('(variance|lengthscale|precision)') -#m1.constrain_positive('(variance|lengthscale)') -#m1.constrain_fixed('prec',10.) +m.constrain_positive('(variance|lengthscale|precision)') +#m.constrain_positive('(variance|lengthscale)') +#m.constrain_fixed('prec',10.) #check gradient FIXME unit test please -m1.checkgrad() +m.checkgrad(verbose=1) +stop # optimize and plot -m1.optimize('tnc', messages = 1) -m1.plot() -# print(m1) +m.optimize('tnc', messages = 1) +m.plot() +# print(m) ###################################### ## 2 dimensional example diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 6313f4f7..6d1b1f44 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression): """ def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): - self.stabilit_multiplier = 1. + self.scale_factor = 1e1 self.beta = beta if Z is None: self.Z = np.random.permutation(X.copy())[:M] @@ -60,14 +60,6 @@ class sparse_GP_regression(GP_regression): if self.has_uncertain_inputs: self.X_uncertainty /= np.square(self._Xstd) - def set_param(self, p): - self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) - self.beta = p[self.M*self.Q] - self.kern.set_param(p[self.Z.size + 1:]) - self.beta2 = self.beta**2 - self._compute_kernel_matrices() - self._computations() - def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation #TODO: slices for psi statistics (easy enough) @@ -77,35 +69,62 @@ class sparse_GP_regression(GP_regression): self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) + raise NotImplementedError, "scale psi2 (in kern?)" else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi1 = self.kern.K(self.Z,self.X) - self.psi2 = np.dot(self.psi1,self.psi1.T) + #self.psi2 = np.dot(self.psi1,self.psi1.T) + #self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] + self.psi1_scaled = self.psi1/self.scale_factor + #self.psi2_scaled = psi1_scaled.T[:,:,None]*psi1_scaled.T[:,None,:] + self.psi2_scaled = np.dot(self.psi1_scaled,self.psi1_scaled.T) def _computations(self): # TODO find routine to multiply triangular matrices - self.V = self.beta*self.Y + sf = self.scale_factor + sf2 = sf**2 + + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + + self.V = (self.beta/self.scale_factor)*self.Y + self.A = mdot(self.Lmi, self.beta*self.psi2_scaled, self.Lmi.T) + self.B = np.eye(self.M)/sf2 + self.A + + self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + self.psi1V = np.dot(self.psi1, self.V) self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T) - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) - self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T) - self.B = np.eye(self.M)*self.stabilit_multiplier + self.A - self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) - self.LLambdai = np.dot(self.LBi, self.Lmi) - self.trace_K = self.psi0 - np.trace(self.A)/self.beta - self.LBL_inv = mdot(self.Lmi.T, self.Bi, self.Lmi) - self.C = mdot(self.LLambdai, self.psi1V) - self.G = mdot(self.LBL_inv, self.psi1VVpsi1, self.LBL_inv.T) + self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) + self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) # Compute dL_dpsi self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) - self.dL_dpsi1 = mdot(self.LLambdai.T,self.C,self.V.T) - self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv - self.Kmmi) + self.G) + self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T + self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi # dB + self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C # dC + self.dL_dpsi2 += - 0.5 * self.beta * self.E # dD # Compute dL_dKmm - self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB - self.dL_dKmm += -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC - self.dL_dKmm += np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE + self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*self.beta*mdot(self.C, self.psi2_scaled, self.Kmmi) + self.Kmmi) # dC + self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.beta*self.psi2_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD + + def log_likelihood(self): + """ Compute the (lower bound on the) log marginal likelihood """ + sf2 = self.scale_factor**2 + A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) -0.5*self.beta*self.trYYT + B = -0.5*self.D*(self.beta*self.psi0-np.trace(self.A)*sf2) + C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + D = +0.5*np.sum(self.psi1VVpsi1 * self.C) + return A+B+C+D + + def set_param(self, p): + self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) + self.beta = p[self.M*self.Q] + self.kern.set_param(p[self.Z.size + 1:]) + self.beta2 = self.beta**2 + self._compute_kernel_matrices() + self._computations() def get_param(self): return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()]) @@ -113,30 +132,19 @@ class sparse_GP_regression(GP_regression): def get_param_names(self): return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names() - def log_likelihood(self): - """ - Compute the (lower bound on the) log marginal likelihood - """ - A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta)) - B = -0.5*self.beta*self.D*self.trace_K - C = -0.5*self.D * self.B_logdet - D = -0.5*self.beta*self.trYYT - E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv) - return A+B+C+D+E - def dL_dbeta(self): """ Compute the gradient of the log likelihood wrt beta. """ #TODO: suport heteroscedatic noise - dA_dbeta = 0.5 * self.N*self.D/self.beta - dB_dbeta = - 0.5 * self.D * self.trace_K + sf2 = self.scale_factor**2 + dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT + dB_dbeta = - 0.5 * self.D * self.psi0 - np.trace(self.A)/self.beta*sf2 dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta - dD_dbeta = - 0.5 * self.trYYT - tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) - dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta + tmp = mdot(self.Bi, self.Lmi, self.psi1V) + dD_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta - return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta + dE_dbeta) + return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta) def dL_dtheta(self): """ From 194d3ebda7f22bf19de1b866bd113992bcc91dce Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 18 Jan 2013 17:38:50 +0000 Subject: [PATCH 10/19] fixed optimize_restarts --- GPy/core/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index cd37ea33..d10ae84a 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -181,7 +181,7 @@ class model(parameterised): else: raise e if len(self.optimization_runs): - i = np.argmax([o.f_opt for o in self.optimization_runs]) + i = np.argmin([o.f_opt for o in self.optimization_runs]) self.expand_param(self.optimization_runs[i].x_opt) else: self.expand_param(initial_parameters) From 0fb8ab91bb2ea616b8878a4fc7ecd27b522e1588 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 18 Jan 2013 17:47:50 +0000 Subject: [PATCH 11/19] various hackday stuff, including scale factor in sparse GP --- GPy/examples/sparse_GPLVM_demo.py | 6 ++--- GPy/examples/sparse_GP_regression_demo.py | 2 +- GPy/models/GPLVM.py | 2 +- GPy/models/sparse_GP_regression.py | 33 ++++++++++------------- grid_parameters.py | 21 ++++++++++----- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/GPy/examples/sparse_GPLVM_demo.py b/GPy/examples/sparse_GPLVM_demo.py index 6ca6c941..5df72b8d 100644 --- a/GPy/examples/sparse_GPLVM_demo.py +++ b/GPy/examples/sparse_GPLVM_demo.py @@ -9,7 +9,7 @@ np.random.seed(1) print "sparse GPLVM with RBF kernel" N = 100 -M = 4 +M = 8 Q = 1 D = 2 #generate GPLVM-like data @@ -19,9 +19,7 @@ K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,D).T m = GPy.models.sparse_GPLVM(Y, Q, M=M) -m.constrain_positive('(rbf|bias|noise)') -m.constrain_bounded('white', 1e-3, 0.1) -# m.plot() +m.constrain_positive('(rbf|bias|noise|white)') pb.figure() m.plot() diff --git a/GPy/examples/sparse_GP_regression_demo.py b/GPy/examples/sparse_GP_regression_demo.py index d67a3831..2a73e149 100644 --- a/GPy/examples/sparse_GP_regression_demo.py +++ b/GPy/examples/sparse_GP_regression_demo.py @@ -12,7 +12,7 @@ import GPy np.random.seed(2) pb.ion() N = 1200 -M = 20 +M = 5 ###################################### ## 1 dimensional example diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 44147b73..89df21c0 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -54,7 +54,7 @@ class GPLVM(GP_regression): def plot(self): assert self.Y.shape[1]==2 - pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0) + pb.scatter(self.Y[:,0],self.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] mu, var = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 6d1b1f44..2dbd7c72 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -60,10 +60,11 @@ class sparse_GP_regression(GP_regression): if self.has_uncertain_inputs: self.X_uncertainty /= np.square(self._Xstd) - def _compute_kernel_matrices(self): - # kernel computations, using BGPLVM notation + def _computations(self): + # TODO find routine to multiply triangular matrices #TODO: slices for psi statistics (easy enough) + # kernel computations, using BGPLVM notation self.Kmm = self.kern.K(self.Z) if self.has_uncertain_inputs: self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() @@ -75,19 +76,16 @@ class sparse_GP_regression(GP_regression): self.psi1 = self.kern.K(self.Z,self.X) #self.psi2 = np.dot(self.psi1,self.psi1.T) #self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:] - self.psi1_scaled = self.psi1/self.scale_factor - #self.psi2_scaled = psi1_scaled.T[:,:,None]*psi1_scaled.T[:,None,:] - self.psi2_scaled = np.dot(self.psi1_scaled,self.psi1_scaled.T) + tmp = self.psi1/(self.scale_factor/np.sqrt(self.beta)) + self.psi2_beta_scaled = np.dot(tmp,tmp.T) - def _computations(self): - # TODO find routine to multiply triangular matrices sf = self.scale_factor sf2 = sf**2 - self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm) + self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)#+np.eye(self.M)*1e-3) self.V = (self.beta/self.scale_factor)*self.Y - self.A = mdot(self.Lmi, self.beta*self.psi2_scaled, self.Lmi.T) + self.A = mdot(self.Lmi, self.psi2_beta_scaled, self.Lmi.T) self.B = np.eye(self.M)/sf2 + self.A self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) @@ -106,8 +104,8 @@ class sparse_GP_regression(GP_regression): # Compute dL_dKmm self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB - self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*self.beta*mdot(self.C, self.psi2_scaled, self.Kmmi) + self.Kmmi) # dC - self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.beta*self.psi2_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD + self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC + self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ @@ -122,8 +120,6 @@ class sparse_GP_regression(GP_regression): self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) self.beta = p[self.M*self.Q] self.kern.set_param(p[self.Z.size + 1:]) - self.beta2 = self.beta**2 - self._compute_kernel_matrices() self._computations() def get_param(self): @@ -139,10 +135,9 @@ class sparse_GP_regression(GP_regression): #TODO: suport heteroscedatic noise sf2 = self.scale_factor**2 dA_dbeta = 0.5 * self.N*self.D/self.beta - 0.5 * self.trYYT - dB_dbeta = - 0.5 * self.D * self.psi0 - np.trace(self.A)/self.beta*sf2 + dB_dbeta = - 0.5 * self.D * (self.psi0 - np.trace(self.A)/self.beta*sf2) dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta - tmp = mdot(self.Bi, self.Lmi, self.psi1V) - dD_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta + dD_dbeta = np.sum((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) * self.psi1VVpsi1 )/self.beta return np.squeeze(dA_dbeta + dB_dbeta + dC_dbeta + dD_dbeta) @@ -184,14 +179,14 @@ class sparse_GP_regression(GP_regression): """Internal helper function for making predictions, does not account for normalisation""" Kx = self.kern.K(self.Z, Xnew) - mu = mdot(Kx.T, self.LBL_inv, self.psi1V) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) if full_cov: Kxx = self.kern.K(Xnew) - var = Kxx - mdot(Kx.T, (self.Kmmi - self.LBL_inv), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case. + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) + np.eye(Xnew.shape[0])/self.beta # TODO: This beta doesn't belong here in the EP case. else: Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.LBL_inv, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case. + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + 1./self.beta # TODO: This beta doesn't belong here in the EP case. return mu,var diff --git a/grid_parameters.py b/grid_parameters.py index 011204ee..64d82755 100644 --- a/grid_parameters.py +++ b/grid_parameters.py @@ -6,8 +6,8 @@ import GPy pb.close('all') -N = 1000 -M = 10 +N = 200 +M = 15 resolution=5 X = np.linspace(0,12,N)[:,None] @@ -16,15 +16,22 @@ Y = np.sin(X) + np.random.randn(*X.shape)/np.sqrt(50.) #k = GPy.kern.rbf(1) k = GPy.kern.Matern32(1) + GPy.kern.white(1) -models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k), - GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k), - GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#, +models = [GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) + ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) + ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k) + ,GPy.models.sparse_GP_regression(X,Y,Z=Z,kernel=k)] +models[0].scale_factor = 1. +models[1].scale_factor = 10. +models[2].scale_factor = 100. +models[3].scale_factor = 1000. + #GPy.models.sgp_debugB(X,Y,Z=Z,kernel=k), + #GPy.models.sgp_debugC(X,Y,Z=Z,kernel=k)]#, #GPy.models.sgp_debugE(X,Y,Z=Z,kernel=k)] -[m.constrain_fixed('white',0.001) for m in models] +[m.constrain_fixed('white',0.1) for m in models] #xx,yy = np.mgrid[1.5:4:0+resolution*1j,-2:2:0+resolution*1j] -xx,yy = np.mgrid[3:16:0+resolution*1j,-2:2:0+resolution*1j] +xx,yy = np.mgrid[3:16:0+resolution*1j,-2:1:0+resolution*1j] lls = [] cgs = [] From eb3061a9f014636a6a7eccd76e2605788817258a Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 18 Jan 2013 18:05:14 +0000 Subject: [PATCH 12/19] minor changes to the apsre regression demo --- GPy/examples/sparse_GP_regression_demo.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/GPy/examples/sparse_GP_regression_demo.py b/GPy/examples/sparse_GP_regression_demo.py index 2a73e149..808d943f 100644 --- a/GPy/examples/sparse_GP_regression_demo.py +++ b/GPy/examples/sparse_GP_regression_demo.py @@ -11,7 +11,7 @@ import numpy as np import GPy np.random.seed(2) pb.ion() -N = 1200 +N = 400 M = 5 ###################################### @@ -29,19 +29,11 @@ kernel = rbf + noise # create simple GP model m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) -# contrain all parameters to be positive m.constrain_positive('(variance|lengthscale|precision)') -#m.constrain_positive('(variance|lengthscale)') -#m.constrain_fixed('prec',10.) - -#check gradient FIXME unit test please m.checkgrad(verbose=1) -stop -# optimize and plot m.optimize('tnc', messages = 1) m.plot() -# print(m) ###################################### ## 2 dimensional example From 8b6e244cf1d62799e931c2cbb804aa0056f9aa69 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Mon, 28 Jan 2013 15:55:40 +0000 Subject: [PATCH 13/19] decent gradients for most parameters --- GPy/kern/kern.py | 2 +- GPy/kern/rbf.py | 12 ++++++------ GPy/kern/rbf_ARD.py | 18 +++++++++--------- GPy/models/__init__.py | 1 + GPy/models/sparse_GP_regression.py | 7 +++---- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 3ba7d97b..0d5e80f0 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -277,7 +277,7 @@ class kern(parameterised): [p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target - def dpsi2_dmuS(self,Z,mu,S,slices1=None,slices2=None): + def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): """return shapes are N,M,M,Q""" slices1, slices2 = self._process_slices(slices1,slices2) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 9e2bb509..0dcd3ea1 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -97,7 +97,7 @@ class rbf(kernpart): def dpsi0_dtheta(self,partial,Z,mu,S,target): target[0] += 1. - def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass def psi1(self,Z,mu,S,target): @@ -118,8 +118,8 @@ class rbf(kernpart): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): self._psi_computations(Z,mu,S) tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom - target_mu += np.sum(partial*tmp*self._psi1_dist,1) - target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) + target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1) + target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1) def psi2(self,Z,mu,S,target): self._psi_computations(Z,mu,S) @@ -140,12 +140,12 @@ class rbf(kernpart): dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) - def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom - target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) - target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) + target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) + target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) def _psi_computations(self,Z,mu,S): #here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py index 732be590..7a5a3368 100644 --- a/GPy/kern/rbf_ARD.py +++ b/GPy/kern/rbf_ARD.py @@ -76,7 +76,7 @@ class rbf_ARD(kernpart): def dpsi0_dtheta(self,partial,Z,mu,S,target): target[0] += 1. - def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): + def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass def psi1(self,Z,mu,S,target): @@ -92,21 +92,21 @@ class rbf_ARD(kernpart): def dpsi1_dZ(self,partial,Z,mu,S,target): self._psi_computations(Z,mu,S) - np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) + # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): """return shapes are N,M,Q""" self._psi_computations(Z,mu,S) tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom - target_mu += np.sum(partial*tmp*self._psi1_dist,1) - target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) + target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1) + target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1) def psi2(self,Z,mu,S,target): self._psi_computations(Z,mu,S) target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise) - def dpsi2_dtheta(self,Z,mu,S,target): + def dpsi2_dtheta(self,partial,Z,mu,S,target): """Shape N,M,M,Ntheta""" self._psi_computations(Z,mu,S) d_var = np.sum(2.*self._psi2/self.variance,0) @@ -115,18 +115,18 @@ class rbf_ARD(kernpart): target[0] += np.sum(partial*d_var) target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0) - def dpsi2_dZ(self,Z,mu,S,target): + def dpsi2_dZ(self,partial,Z,mu,S,target): """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) - def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom - target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) - target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) + target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) + target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) def _K_computations(self,X,X2): if not (np.all(X==self._X) and np.all(X2==self._X2)): diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 3b8b31c8..ce7a0d0a 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -10,3 +10,4 @@ from GP_EP import GP_EP from generalized_FITC import generalized_FITC from sparse_GPLVM import sparse_GPLVM from uncollapsed_sparse_GP import uncollapsed_sparse_GP +from BGPLVM import Bayesian_GPLVM diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 2dbd7c72..692d77d1 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression): """ def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): - self.scale_factor = 1e1 + self.scale_factor = 10.0 self.beta = beta if Z is None: self.Z = np.random.permutation(X.copy())[:M] @@ -70,7 +70,8 @@ class sparse_GP_regression(GP_regression): self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) - raise NotImplementedError, "scale psi2 (in kern?)" + # raise NotImplementedError, "scale psi2 (in kern?)" + self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2) else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi1 = self.kern.K(self.Z,self.X) @@ -292,5 +293,3 @@ class sgp_debugE(sparse_GP_regression): tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta return np.squeeze(dE_dbeta) - - From 936d08723eb9f84cf034330eca5f207908ace004 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Tue, 29 Jan 2013 14:02:41 +0000 Subject: [PATCH 14/19] BGPLVM working with rbf+white --- GPy/examples/oil_flow_demo.py | 30 +++++++------ GPy/kern/rbf.py | 16 ++++--- GPy/kern/rbf_ARD.py | 14 +++--- GPy/models/BGPLVM.py | 68 ++++++++++++++++++++++++++++++ GPy/models/sparse_GP_regression.py | 9 ++-- 5 files changed, 105 insertions(+), 32 deletions(-) create mode 100644 GPy/models/BGPLVM.py diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py index eee74461..f977e18d 100644 --- a/GPy/examples/oil_flow_demo.py +++ b/GPy/examples/oil_flow_demo.py @@ -20,26 +20,28 @@ def plot_oil(X, theta, labels, label): plt.plot(flow_type[:,0], flow_type[:,1], 'bx') plt.title(label) -data = pickle.load(open('../util/datasets/oil_flow_3classes.pickle', 'r')) +data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r')) Y = data['DataTrn'] N, D = Y.shape -selected = np.random.permutation(N)[:200] +selected = np.random.permutation(N)#[:200] labels = data['DataTrnLbls'][selected] Y = Y[selected] N, D = Y.shape Y -= Y.mean(axis=0) -Y /= Y.std(axis=0) +#Y /= Y.std(axis=0) -Q = 2 -m1 = GPy.models.sparse_GPLVM(Y, Q, M = 15) -m1.constrain_positive('(rbf|bias|noise)') -m1.constrain_bounded('white', 1e-6, 1.0) +Q = 7 +k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q) +m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12) +m.constrain_positive('(rbf|bias|S|white|noise)') +# m.constrain_bounded('white', 1e-6, 100.0) +# m.constrain_bounded('noise', 1e-4, 1000.0) -plot_oil(m1.X, np.array([1,1]), labels, 'PCA initialization') +plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization') # m.optimize(messages = True) -m1.optimize('bfgs', messages = True) -plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') +m.optimize('tnc', messages = True) +plot_oil(m.X, m.kern.parts[0].lengthscales, labels, 'B-GPLVM') # pb.figure() # m.plot() # pb.title('PCA initialisation') @@ -47,7 +49,7 @@ plot_oil(m1.X, np.array([1,1]), labels, 'sparse GPLVM') # m.optimize(messages = 1) # m.plot() # pb.title('After optimisation') -m = GPy.models.GPLVM(Y, Q) -m.constrain_positive('(white|rbf|bias|noise)') -m.optimize() -plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') +# m = GPy.models.GPLVM(Y, Q) +# m.constrain_positive('(white|rbf|bias|noise)') +# m.optimize() +# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM') diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 0dcd3ea1..c22e68c7 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -95,7 +95,7 @@ class rbf(kernpart): target += self.variance def dpsi0_dtheta(self,partial,Z,mu,S,target): - target[0] += 1. + target[0] += np.sum(partial) def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass @@ -113,7 +113,9 @@ class rbf(kernpart): def dpsi1_dZ(self,partial,Z,mu,S,target): self._psi_computations(Z,mu,S) - target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0) + denominator = (self.lengthscale2*(self._psi1_denom)) + dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) + target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): self._psi_computations(Z,mu,S) @@ -132,13 +134,14 @@ class rbf(kernpart): d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = d_length.sum(0) target[0] += np.sum(partial*d_var) - target[1] += np.sum(d_length*partial) + target[1] += np.sum(d_length*partial[:,:,None]) def dpsi2_dZ(self,partial,Z,mu,S,target): - """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) - dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) - target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) + term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q + term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q + dZ = self._psi2[:,:,:,None] * (term1[None] + term2) + target += (partial[None,:,:,None]*dZ).sum(0).sum(0) def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ @@ -175,4 +178,3 @@ class rbf(kernpart): self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M self._Z, self._mu, self._S = Z, mu,S - diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py index 7a5a3368..79c6ff58 100644 --- a/GPy/kern/rbf_ARD.py +++ b/GPy/kern/rbf_ARD.py @@ -74,7 +74,7 @@ class rbf_ARD(kernpart): target += self.variance def dpsi0_dtheta(self,partial,Z,mu,S,target): - target[0] += 1. + target[0] += np.sum(partial) def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): pass @@ -93,7 +93,9 @@ class rbf_ARD(kernpart): def dpsi1_dZ(self,partial,Z,mu,S,target): self._psi_computations(Z,mu,S) # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) - target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) + denominator = (self.lengthscales2*(self._psi1_denom)) + dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) + target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): """return shapes are N,M,Q""" @@ -118,8 +120,10 @@ class rbf_ARD(kernpart): def dpsi2_dZ(self,partial,Z,mu,S,target): """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) - dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) - target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) + term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q + term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q + dZ = self._psi2[:,:,:,None] * (term1[None] + term2) + target += (partial[None,:,:,None]*dZ).sum(0).sum(0) def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ @@ -247,5 +251,3 @@ if __name__=='__main__': return f,df print "psi2_theta_test" checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,)) - - diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py new file mode 100644 index 00000000..2c760ec3 --- /dev/null +++ b/GPy/models/BGPLVM.py @@ -0,0 +1,68 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +import sys, pdb +from GPLVM import GPLVM +from sparse_GP_regression import sparse_GP_regression +from GPy.util.linalg import pdinv + +class Bayesian_GPLVM(sparse_GP_regression, GPLVM): + """ + Bayesian Gaussian Process Latent Variable Model + + :param Y: observed data + :type Y: np.ndarray + :param Q: latent dimensionality + :type Q: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + + """ + def __init__(self, Y, Q, init='PCA', **kwargs): + X = self.initialise_latent(init, Q, Y) + S = np.ones_like(X) * 1e-2# + sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs) + + def get_param_names(self): + X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) + return (X_names + S_names + sparse_GP_regression.get_param_names(self)) + + def get_param(self): + """ + Horizontally stacks the parameters in order to present them to the optimizer. + The resulting 1-D array has this structure: + + =============================================================== + | mu | S | Z | beta | theta | + =============================================================== + + """ + return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self))) + + def set_param(self,x): + N, Q = self.N, self.Q + self.X = x[:self.X.size].reshape(N,Q).copy() + self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() + sparse_GP_regression.set_param(self, x[(2*N*Q):]) + + def dL_dmuS(self): + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_uncertainty) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_uncertainty) + dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 + dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 + + return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) + + + def log_likelihood_gradients(self): + return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self))) + + def plot(self): + GPLVM.plot(self) + #passing Z without a small amout of jitter will induce the white kernel where we don;t want it! + mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001) + pb.plot(mu[:, 0] , mu[:, 1], 'ko') diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 692d77d1..8caf38b1 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression): """ def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): - self.scale_factor = 10.0 + self.scale_factor = 1000.0 self.beta = beta if Z is None: self.Z = np.random.permutation(X.copy())[:M] @@ -70,7 +70,6 @@ class sparse_GP_regression(GP_regression): self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) - # raise NotImplementedError, "scale psi2 (in kern?)" self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2) else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() @@ -163,10 +162,10 @@ class sparse_GP_regression(GP_regression): """ The derivative of the bound wrt the inducing inputs Z """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z,)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ if self.has_uncertain_inputs: - dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) - dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) + dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_uncertainty) + dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' else: #re-cast computations in psi2 back to psi1: dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) From ff1b64022e41cacd77bda9845cea53e612acf477 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Tue, 29 Jan 2013 15:57:03 +0000 Subject: [PATCH 15/19] BGPLVM working --- GPy/examples/oil_flow_demo.py | 2 +- GPy/models/BGPLVM.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py index f977e18d..4bcdfa63 100644 --- a/GPy/examples/oil_flow_demo.py +++ b/GPy/examples/oil_flow_demo.py @@ -31,7 +31,7 @@ N, D = Y.shape Y -= Y.mean(axis=0) #Y /= Y.std(axis=0) -Q = 7 +Q = 10 k = GPy.kern.rbf_ARD(Q) + GPy.kern.white(Q) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 12) m.constrain_positive('(rbf|bias|S|white|noise)') diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py index 2c760ec3..3fc257e9 100644 --- a/GPy/models/BGPLVM.py +++ b/GPy/models/BGPLVM.py @@ -57,12 +57,6 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM): return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) - def log_likelihood_gradients(self): return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self))) - def plot(self): - GPLVM.plot(self) - #passing Z without a small amout of jitter will induce the white kernel where we don;t want it! - mu, var = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001) - pb.plot(mu[:, 0] , mu[:, 1], 'ko') From 10c774e84ecc4f2bd234393b85e1464703f24305 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Tue, 29 Jan 2013 16:10:12 +0000 Subject: [PATCH 16/19] new shape for psi2 --- GPy/kern/kern.py | 10 +++++----- GPy/kern/rbf_ARD.py | 16 ++++++++-------- GPy/models/sparse_GP_regression.py | 12 ++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 0d5e80f0..6201df35 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -259,29 +259,29 @@ class kern(parameterised): :Z: np.ndarray of inducing inputs (M x Q) : mu, S: np.ndarrays of means and variacnes (each N x Q) :returns psi2: np.ndarray (N,M,M,Q) """ - target = np.zeros((Z.shape[0],Z.shape[0])) + target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0])) slices1, slices2 = self._process_slices(slices1,slices2) - [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target def dpsi2_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): """Returns shape (N,M,M,Ntheta)""" slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros(self.Nparam) - [p.dpsi2_dtheta(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] + [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] return target def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros_like(Z) - [p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): """return shapes are N,M,M,Q""" slices1, slices2 = self._process_slices(slices1,slices2) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) - [p.dpsi2_dmuS(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] #TODO: there are some extra terms to compute here! return target_mu, target_S diff --git a/GPy/kern/rbf_ARD.py b/GPy/kern/rbf_ARD.py index 79c6ff58..a29a2bc9 100644 --- a/GPy/kern/rbf_ARD.py +++ b/GPy/kern/rbf_ARD.py @@ -106,31 +106,31 @@ class rbf_ARD(kernpart): def psi2(self,Z,mu,S,target): self._psi_computations(Z,mu,S) - target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise) + target += self._psi2 def dpsi2_dtheta(self,partial,Z,mu,S,target): """Shape N,M,M,Ntheta""" self._psi_computations(Z,mu,S) - d_var = np.sum(2.*self._psi2/self.variance,0) + d_var = 2.*self._psi2/self.variance d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom) - d_length = d_length.sum(0) + # d_length = d_length.sum(0) target[0] += np.sum(partial*d_var) - target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0) + target[1:] += (d_length*partial[:,:,:,None]).sum(0).sum(0).sum(0) def dpsi2_dZ(self,partial,Z,mu,S,target): """Returns shape N,M,M,Q""" self._psi_computations(Z,mu,S) term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q - dZ = self._psi2[:,:,:,None] * (term1[None] + term2) - target += (partial[None,:,:,None]*dZ).sum(0).sum(0) + dZ = self._psi2[:,:,:,None] * (term1[None] + term2) + target += (partial[:,:,:,None]*dZ).sum(0).sum(0) def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom - target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) - target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) + target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) + target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) def _K_computations(self,X,X2): if not (np.all(X==self._X) and np.all(X2==self._X2)): diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 8caf38b1..8ea99116 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -70,7 +70,7 @@ class sparse_GP_regression(GP_regression): self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) - self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2) + self.psi2_beta_scaled = (self.psi2*(self.beta/self.scale_factor**2)).sum(0) else: self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi1 = self.kern.K(self.Z,self.X) @@ -98,9 +98,9 @@ class sparse_GP_regression(GP_regression): # Compute dL_dpsi self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N) self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T - self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi # dB - self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C # dC - self.dL_dpsi2 += - 0.5 * self.beta * self.E # dD + self.dL_dpsi2 = 0.5 * self.beta * self.D * self.Kmmi[None,:,:] # dB + self.dL_dpsi2 += - 0.5 * self.beta/sf2 * self.D * self.C[None,:,:] # dC + self.dL_dpsi2 += - 0.5 * self.beta * self.E[None,:,:] # dD # Compute dL_dKmm self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB @@ -152,7 +152,7 @@ class sparse_GP_regression(GP_regression): dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape else: #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) dL_dtheta += self.kern.dK_dtheta(dL_dpsi1,self.Z,self.X) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X) @@ -168,7 +168,7 @@ class sparse_GP_regression(GP_regression): dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # 'stripes' else: #re-cast computations in psi2 back to psi1: - dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2,self.psi1) + dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) return dL_dZ From 6959a905dc13380200cb0e05edd9ffd1384ab170 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Tue, 29 Jan 2013 17:23:51 +0000 Subject: [PATCH 17/19] broken commit, working on cross terms for psi statistics --- GPy/kern/kern.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 6201df35..7da4926a 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -6,7 +6,7 @@ import numpy as np from ..core.parameterised import parameterised from functools import partial from kernpart import kernpart - +import itertools class kern(parameterised): def __init__(self,D,parts=[], input_slices=None): @@ -262,6 +262,15 @@ class kern(parameterised): target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0])) slices1, slices2 = self._process_slices(slices1,slices2) [p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + + # MASSIVE TODO: do something smart for white + # "crossterms" + psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] + [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] + for a,b in itertools.combinations(psi1_matrices, 2): + tmp = np.multiply(a,b) + target += tmp[:,None,:] + tmp[:, :,None] + return target def dpsi2_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): @@ -269,12 +278,27 @@ class kern(parameterised): slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros(self.Nparam) [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] + + + # "crossterms" + # 1. get all the psi1 statistics + psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] + [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] + # 2. get all the dpsi1/dtheta gradients + psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] + [p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)] + # 3. multiply them somehow + for a,b in itertools.combinations(range(len(psi1_matrices)), 2): + psi2_cross = np.multiply(psi1, psi1_grad) # some newaxis of this + target += psi2_cross[:,None,:] + psi2_cross[:, :,None] + return target def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros_like(Z) [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + return target def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): From ece4e2442c3c5492fc514a3007d38de7a2837bf5 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Wed, 30 Jan 2013 11:18:22 +0000 Subject: [PATCH 18/19] working on cross terms --- GPy/kern/kern.py | 9 +++++---- GPy/models/sparse_GP_regression.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 7da4926a..89c02bc2 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -273,7 +273,7 @@ class kern(parameterised): return target - def dpsi2_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None): """Returns shape (N,M,M,Ntheta)""" slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros(self.Nparam) @@ -286,12 +286,13 @@ class kern(parameterised): [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] # 2. get all the dpsi1/dtheta gradients psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] - [p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)] + [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)] + # 3. multiply them somehow for a,b in itertools.combinations(range(len(psi1_matrices)), 2): - psi2_cross = np.multiply(psi1, psi1_grad) # some newaxis of this - target += psi2_cross[:,None,:] + psi2_cross[:, :,None] + gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0) + target += 0#(gne[None] + gne[:, None]).sum(0) return target def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py index 8ea99116..e0a8a35d 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_GP_regression.py @@ -149,7 +149,7 @@ class sparse_GP_regression(GP_regression): if self.has_uncertain_inputs: dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z,self.X,self.X_uncertainty) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T,self.Z,self.X, self.X_uncertainty) - dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape + dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2,self.dL_dpsi1.T, self.Z,self.X, self.X_uncertainty) # for multiple_beta, dL_dpsi2 will be a different shape else: #re-cast computations in psi2 back to psi1: dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(self.dL_dpsi2.sum(0),self.psi1) From ce3e2e41f4d98aafc37dc02dfdf4800ccf0128b4 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Wed, 30 Jan 2013 15:14:54 +0000 Subject: [PATCH 19/19] trying to get psi2 cross terms to work --- GPy/kern/kern.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 89c02bc2..393554bf 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -284,6 +284,8 @@ class kern(parameterised): # 1. get all the psi1 statistics psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] + partial1 = np.zeros_like(partial1) + # 2. get all the dpsi1/dtheta gradients psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)] @@ -292,7 +294,7 @@ class kern(parameterised): for a,b in itertools.combinations(range(len(psi1_matrices)), 2): gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0) - target += 0#(gne[None] + gne[:, None]).sum(0) + target += (gne[None] + gne[:, None]).sum(0) return target def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):