dimensionality reduction merge

This commit is contained in:
Max Zwiessele 2013-07-17 17:45:07 +01:00
commit 7f63849dd2
9 changed files with 28 additions and 87 deletions

View file

@ -96,8 +96,7 @@ class GP(GPBase):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term. matrix K* = K + diag(1./tau_tilde) plus a normalization term.
""" """
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - return - 0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - 0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):

View file

@ -108,7 +108,7 @@ class SparseGP(GPBase):
self.B = np.eye(self.num_inducing) + self.A self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! #VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor) self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
# back substutue C into psi1Vf # back substutue C into psi1Vf
@ -163,7 +163,7 @@ class SparseGP(GPBase):
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V*self.likelihood.Y)
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
@ -246,7 +246,7 @@ class SparseGP(GPBase):
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None: if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T, self.likelihood.V) psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0) tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1) tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1) self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1)

View file

@ -4,7 +4,7 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides from ..util.linalg import linalg, pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
from ..likelihoods import EP from ..likelihoods import EP
from gp_base import GPBase from gp_base import GPBase
from model import Model from model import Model
@ -269,7 +269,6 @@ class SVIGP(GPBase):
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5): def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
param_step = 0. param_step = 0.
#Iterate! #Iterate!
for i in range(iterations): for i in range(iterations):
@ -288,6 +287,7 @@ class SVIGP(GPBase):
#compute the steps in all parameters #compute the steps in all parameters
vb_step = self.vb_steplength*natgrads[0] vb_step = self.vb_steplength*natgrads[0]
if (self.epochs>=1):#only move the parameters after the first epoch if (self.epochs>=1):#only move the parameters after the first epoch
# print "it {} ep {} par {}".format(self.iterations, self.epochs, param_step)
param_step = self.momentum*param_step + self.param_steplength*grads param_step = self.momentum*param_step + self.param_steplength*grads
else: else:
param_step = 0. param_step = 0.
@ -295,8 +295,8 @@ class SVIGP(GPBase):
self.set_vb_param(self.get_vb_param() + vb_step) self.set_vb_param(self.get_vb_param() + vb_step)
#Note: don't recompute everything here, wait until the next iteration when we have a new batch #Note: don't recompute everything here, wait until the next iteration when we have a new batch
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False) self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
#print messages if desired #print messages if desired
if i and (not i%print_interval): if i and (not i%print_interval):
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood() print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
print np.round(np.mean(self._grad_trace[-print_interval:],0),3) print np.round(np.mean(self._grad_trace[-print_interval:],0),3)

View file

@ -24,7 +24,7 @@ def BGPLVM(seed=default_seed):
Y = np.random.multivariate_normal(np.zeros(N), K, Q).T Y = np.random.multivariate_normal(np.zeros(N), K, Q).T
lik = Gaussian(Y, normalize=True) lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) k = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
@ -145,6 +145,7 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_iters=150, plot=
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
Yn = Y - Y.mean(0) Yn = Y - Y.mean(0)
Yn /= Yn.std(0) Yn /= Yn.std(0)
@ -375,11 +376,12 @@ def stick():
def stick_bgplvm(model=None): def stick_bgplvm(model=None):
data = GPy.util.datasets.stick() data = GPy.util.datasets.stick()
Q = 6 Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) kernel = GPy.kern.rbf_inv(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=35,kernel=kernel)
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(messages=1, max_f_eval=3000, xtol=1e-300, ftol=1e-300) m.constrain_bounded('.*rbf_inv',1e-5, 100)
m.optimize(messages=1, max_iters=3000,xtol=1e-300,ftol=1e-300)
m._set_params(m._get_params()) m._set_params(m._get_params())
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes) plt.sca(latent_axes)

View file

@ -67,8 +67,8 @@ def toy_ARD(optim_iters=1000, kernel_type='linear', N=300, D=4):
X4 = np.log(np.sort(np.random.rand(N,1),0)) X4 = np.log(np.sort(np.random.rand(N,1),0))
X = np.hstack((X1, X2, X3, X4)) X = np.hstack((X1, X2, X3, X4))
Y1 = np.asarray(2*X[:,0]+3).T Y1 = np.asmatrix(2*X[:,0]+3).T
Y2 = np.asarray(4*(X[:,2]-1.5*X[:,0])).T Y2 = np.asmatrix(4*(X[:,2]-1.5*X[:,0])).T
Y = np.hstack((Y1, Y2)) Y = np.hstack((Y1, Y2))
Y = np.dot(Y, np.random.rand(2,D)); Y = np.dot(Y, np.random.rand(2,D));

View file

@ -66,26 +66,12 @@ class kern(Parameterized):
Parameterized.setstate(self, state) Parameterized.setstate(self, state)
def plot_ARD(self, fignum=None, ax=None, title='', legend=False): def plot_ARD(self, fignum=None, ax=None, title=None):
"""If an ARD kernel is present, it bar-plots the ARD parameters, """If an ARD kernel is present, it bar-plots the ARD parameters"""
:param fignum: figure number of the plot
:param ax: matplotlib axis to plot on
:param title:
title of the plot,
pass '' to not print a title
pass None for a generic title
"""
if ax is None: if ax is None:
fig = pb.figure(fignum) fig = pb.figure(fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
from GPy.util import Tango
from matplotlib.textpath import TextPath
Tango.reset()
xticklabels = []
bars = []
x0 = 0
for p in self.parts: for p in self.parts:
c = Tango.nextMedium()
if hasattr(p, 'ARD') and p.ARD: if hasattr(p, 'ARD') and p.ARD:
if title is None: if title is None:
ax.set_title('ARD parameters, %s kernel' % p.name) ax.set_title('ARD parameters, %s kernel' % p.name)
@ -96,32 +82,10 @@ class kern(Parameterized):
else: else:
ard_params = 1. / p.lengthscale ard_params = 1. / p.lengthscale
x = np.arange(x0, x0 + len(ard_params)) x = np.arange(len(ard_params))
bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name)) ax.bar(x - 0.4, ard_params)
xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) ax.set_xticks(x)
x0 += len(ard_params) ax.set_xticklabels([r"${}$".format(i) for i in x])
x = np.arange(x0)
for bar in bars:
for patch, num in zip(bar.patches, np.arange(len(bar.patches))):
height = patch.get_height()
xi = patch.get_x() + patch.get_width() / 2.
va = 'top'
c = 'w'
t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center')
if patch.get_extents().height <= t.get_extents().height + 2:
va = 'bottom'
c = 'k'
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va)
# for xi, t in zip(x, xticklabels):
# ax.text(xi, maxi / 2, t, rotation=90, ha='center', va='center')
# ax.set_xticklabels(xticklabels, rotation=17)
ax.set_xticks([])
ax.set_xlim(-.5, x0 - .5)
if title is '':
ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=len(bars), mode="expand", borderaxespad=0.)
else:
ax.legend()
return ax return ax
def _transform_gradients(self, g): def _transform_gradients(self, g):
@ -397,7 +361,6 @@ class kern(Parameterized):
# compute the "cross" terms # compute the "cross" terms
# TODO: better looping, input_slices # TODO: better looping, input_slices
for i1, i2 in itertools.permutations(range(len(self.parts)), 2): for i1, i2 in itertools.permutations(range(len(self.parts)), 2):
p1, p2 = self.parts[i1], self.parts[i2] p1, p2 = self.parts[i1], self.parts[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] # ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]

View file

@ -115,8 +115,8 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def plot_latent(self, plot_inducing=True, *args, **kwargs): def plot_latent(self, *args, **kwargs):
return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) return plot_latent.plot_latent(self, *args, **kwargs)
def do_test_latents(self, Y): def do_test_latents(self, Y):
""" """

View file

@ -36,10 +36,10 @@ class GPLVM(GP):
self.ensure_default_constraints() self.ensure_default_constraints()
def initialise_latent(self, init, input_dim, Y): def initialise_latent(self, init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim)
if init == 'PCA': if init == 'PCA':
Xr[:, :Y.shape[1]] = PCA(Y, input_dim)[0] return PCA(Y, input_dim)[0]
return Xr else:
return np.random.randn(Y.shape[0], input_dim)
def getstate(self): def getstate(self):
return GP.getstate(self) return GP.getstate(self)

View file

@ -23,7 +23,7 @@ class GradientTests(unittest.TestCase):
self.X2D = np.random.uniform(-3., 3., (40, 2)) self.X2D = np.random.uniform(-3., 3., (40, 2))
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05 self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05
def check_model_with_white(self, kern, model_type='GPRegression', dimension=1, uncertain_inputs=False): def check_model_with_white(self, kern, model_type='GPRegression', dimension=1):
# Get the correct gradients # Get the correct gradients
if dimension == 1: if dimension == 1:
X = self.X1D X = self.X1D
@ -36,10 +36,7 @@ class GradientTests(unittest.TestCase):
noise = GPy.kern.white(dimension) noise = GPy.kern.white(dimension)
kern = kern + noise kern = kern + noise
if uncertain_inputs: m = model_fit(X, Y, kernel=kern)
m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1]))
else:
m = model_fit(X, Y, kernel=kern)
m.randomize() m.randomize()
# contrain all parameters to be positive # contrain all parameters to be positive
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -144,26 +141,6 @@ class GradientTests(unittest.TestCase):
rbf = GPy.kern.rbf(2) rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2) self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1)
self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear and white kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2)
self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear and white kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1)
self.check_model_with_white(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1)
def test_GPLVM_rbf_bias_white_kern_2D(self): def test_GPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias and white kernel """ """ Testing GPLVM with rbf + bias and white kernel """
N, input_dim, D = 50, 1, 2 N, input_dim, D = 50, 1, 2