demo changed, bgplvm still broken

This commit is contained in:
Max Zwiessele 2013-04-18 17:59:01 +01:00
parent 865e9df255
commit 10703e4774
3 changed files with 113 additions and 92 deletions

View file

@ -170,26 +170,30 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
from GPy import kern from GPy import kern
reload(mrd); reload(kern) reload(mrd); reload(kern)
Y = Ylist[1] Y = Ylist[1]
k = kern.linear(Q, ARD=True) + kern.bias(Q, .0001) + kern.white(Q, .1) k = kern.linear(Q, ARD=True) + kern.white(Q, .00001) # + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k) m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
m.set('noise', Y.var() / 100.) # m.set('noise',)
# m.auto_scale_factor = True # m.auto_scale_factor = True
# m.scale_factor = 1. # m.scale_factor = 1.
m.ensure_default_constraints() m.ensure_default_constraints()
if burnin: if burnin:
print "initializing beta" print "initializing beta"
cstr = "noise" cstr = "noise"
m.unconstrain(cstr); m.constrain_fixed(cstr) m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 100.)
m.optimize(burnin, messages=1, max_f_eval=max_f_eval) m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
print "releasing beta" print "releasing beta"
cstr = "noise" cstr = "noise"
m.unconstrain(cstr); m.constrain_positive(cstr) m.unconstrain(cstr); m.constrain_positive(cstr)
true_X = np.hstack((slist[1], slist[3], 0. * np.ones((N, Q - 2))))
m.set('X_\d', true_X)
m.constrain_fixed("X_\d")
# # cstr = 'variance' # # cstr = 'variance'
# # m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.) # # m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)

View file

@ -82,7 +82,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
self._set_params(self.oldps[-1], save_old=False) self._set_params(self.oldps[-1], save_old=False)
def dKL_dmuS(self): def dKL_dmuS(self):
dKL_dS = (1. - (1. / self.X_variance)) * 0.5 dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X dKL_dmu = self.X
return dKL_dmu, dKL_dS return dKL_dmu, dKL_dS
@ -101,13 +101,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
def log_likelihood(self): def log_likelihood(self):
return sparse_GP.log_likelihood(self) - self.KL_divergence() ll = sparse_GP.log_likelihood(self)
kl = self.KL_divergence()
return ll + kl
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dKL_dmu, dKL_dS = self.dKL_dmuS() dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS() dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster # TODO: find way to make faster
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
d_dmu = (dL_dmu + dKL_dmu).flatten()
d_dS = (dL_dS + dKL_dS).flatten()
# TEST KL: ====================
# d_dmu = (dKL_dmu).flatten()
# d_dS = (dKL_dS).flatten()
# ========================
# TEST L: ====================
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
dbound_dmuS = np.hstack((d_dmu, d_dS))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self))) return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
def plot_latent(self, which_indices=None, *args, **kwargs): def plot_latent(self, which_indices=None, *args, **kwargs):

View file

@ -6,8 +6,8 @@ import numpy as np
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..core import model from ..core import model
from ..util.linalg import pdinv,mdot from ..util.linalg import pdinv, mdot
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP from ..likelihoods import EP
class GP(model): class GP(model):
@ -35,25 +35,25 @@ class GP(model):
# parse arguments # parse arguments
self.Xslices = Xslices self.Xslices = Xslices
self.X = X self.X = X
assert len(self.X.shape)==2 assert len(self.X.shape) == 2
self.N, self.Q = self.X.shape self.N, self.Q = self.X.shape
assert isinstance(kernel, kern.kern) assert isinstance(kernel, kern.kern)
self.kern = kernel self.kern = kernel
#here's some simple normalization for the inputs # here's some simple normalization for the inputs
if normalize_X: if normalize_X:
self._Xmean = X.mean(0)[None,:] self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None,:] self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'): if hasattr(self, 'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd self.Z = (self.Z - self._Xmean) / self._Xstd
else: else:
self._Xmean = np.zeros((1,self.X.shape[1])) self._Xmean = np.zeros((1, self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1])) self._Xstd = np.ones((1, self.X.shape[1]))
self.likelihood = likelihood self.likelihood = likelihood
#assert self.X.shape[0] == self.likelihood.Y.shape[0] # assert self.X.shape[0] == self.likelihood.Y.shape[0]
#self.N, self.D = self.likelihood.Y.shape # self.N, self.D = self.likelihood.Y.shape
assert self.X.shape[0] == self.likelihood.data.shape[0] assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape self.N, self.D = self.likelihood.data.shape
@ -65,24 +65,24 @@ class GP(model):
""" """
return np.zeros_like(self.Z) return np.zeros_like(self.Z)
def _set_params(self,p): def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam]) self.kern._set_params_transformed(p[:self.kern.Nparam])
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas # self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices) self.K = self.kern.K(self.X, slices1=self.Xslices, slices2=self.Xslices)
self.K += self.likelihood.covariance_matrix self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
#the gradient of the likelihood wrt the covariance matrix # the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
alpha = np.dot(self.Ki,self.likelihood.Y) alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki) self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
else: else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
self.dL_dK = 0.5*(tmp - self.D*self.Ki) self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self): def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -98,16 +98,16 @@ class GP(model):
this function does nothing this function does nothing
""" """
self.likelihood.fit_full(self.kern.K(self.X)) self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _model_fit_term(self): def _model_fit_term(self):
""" """
Computes the model fit using YYT if it's available Computes the model fit using YYT if it's available
""" """
if self.likelihood.YYT is None: if self.likelihood.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y))) return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else: else:
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT)) return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self): def log_likelihood(self):
""" """
@ -117,7 +117,7 @@ class GP(model):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term. matrix K* = K + diag(1./tau_tilde) plus a normalization term.
""" """
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
@ -128,27 +128,27 @@ class GP(model):
For the likelihood parameters, pass in alpha = K^-1 y For the likelihood parameters, pass in alpha = K^-1 y
""" """
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X, slices1=self.Xslices, slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self,_Xnew,slices=None, full_cov=False): def _raw_predict(self, _Xnew, slices=None, full_cov=False):
""" """
Internal helper function for making predictions, does not account Internal helper function for making predictions, does not account
for normalization or likelihood for normalization or likelihood
""" """
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices) Kx = self.kern.K(self.X, _Xnew, slices1=self.Xslices, slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y) mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
KiKx = np.dot(self.Ki,Kx) KiKx = np.dot(self.Ki, Kx)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices) Kxx = self.kern.K(_Xnew, slices1=slices, slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx) var = Kxx - np.dot(KiKx.T, Kx)
else: else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices) Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0) var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:,None] var = var[:, None]
return mu, var return mu, var
def predict(self,Xnew, slices=None, full_cov=False): def predict(self, Xnew, slices=None, full_cov=False):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
@ -174,11 +174,11 @@ class GP(model):
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """
#normalize X values # normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov) mu, var = self._raw_predict(Xnew, slices, full_cov)
#now push through likelihood TODO # now push through likelihood TODO
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov) mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm return mean, var, _025pm, _975pm
@ -204,86 +204,90 @@ class GP(model):
Can plot only part of the data and part of the posterior functions using which_data and which_functions Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
""" """
if which_functions=='all': if which_functions == 'all':
which_functions = [True]*self.kern.Nparts which_functions = [True] * self.kern.Nparts
if which_data=='all': if which_data == 'all':
which_data = slice(None) which_data = slice(None)
if self.X.shape[1] == 1: if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0: if samples == 0:
m,v = self._raw_predict(Xnew, slices=which_functions) m, v = self._raw_predict(Xnew, slices=which_functions)
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v)) gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5) pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else: else:
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True) m, v = self._raw_predict(Xnew, slices=which_functions, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(),v,samples) Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None]) gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
for i in range(samples): for i in range(samples):
pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25) pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5) pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
pb.xlim(xmin,xmax) pb.xlim(xmin, xmax)
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None]))) ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.ylim(ymin,ymax) pb.ylim(ymin, ymax)
if hasattr(self,'Z'): if hasattr(self, 'Z'):
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2: elif self.X.shape[1] == 2:
resolution = resolution or 50 resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution) Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m,v = self._raw_predict(Xnew, slices=which_functions) m, v = self._raw_predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T m = m.reshape(resolution, resolution).T
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet) pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max()) pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1], xmax[1])
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20): def plot(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, levels=20):
""" """
TODO: Docstrings! TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use :param levels: for 2D plotting, the number of contour levels to use
""" """
# TODO include samples # TODO include samples
if which_functions=='all': if which_functions == 'all':
which_functions = [True]*self.kern.Nparts which_functions = [True] * self.kern.Nparts
if which_data=='all': if which_data == 'all':
which_data = slice(None) which_data = slice(None)
if self.X.shape[1] == 1: if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, slices=which_functions) m, var, lower, upper = self.predict(Xnew, slices=which_functions)
gpplot(Xnew,m, lower, upper) gpplot(Xnew, m, lower, upper)
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5) pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper)) if self.has_uncertain_inputs:
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin) pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
pb.xlim(xmin,xmax) xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
pb.ylim(ymin,ymax) ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
elif self.X.shape[1]==2: #FIXME ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.xlim(xmin, xmax)
pb.ylim(ymin, ymax)
if hasattr(self, 'Z'):
Zu = self.Z * self._Xstd + self._Xmean
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50 resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution) Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, var, lower, upper = self.predict(Xnew, slices=which_functions) m, var, lower, upper = self.predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T m = m.reshape(resolution, resolution).T
pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet) pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
Yf = self.likelihood.Y.flatten() Yf = self.likelihood.Y.flatten()
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.) pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1], xmax[1])
if hasattr(self,'Z'): if hasattr(self, 'Z'):
pb.plot(self.Z[:,0],self.Z[:,1],'wo') pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"