fixed merge conflict on BPGLVM

This commit is contained in:
Nicolo Fusi 2013-04-12 13:35:00 +01:00
commit a42d84274d
10 changed files with 367 additions and 67 deletions

View file

@ -3,29 +3,30 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from matplotlib import pyplot as plt from matplotlib import pyplot as plt, pyplot
import GPy import GPy
from GPy.models.mrd import MRD
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
def BGPLVM(seed = default_seed): def BGPLVM(seed=default_seed):
N = 10 N = 10
M = 3 M = 3
Q = 2 Q = 2
D = 4 D = 4
#generate GPLVM-like data # generate GPLVM-like data
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N), K, D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q) k = GPy.kern.linear(Q, ARD=True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)
@ -38,44 +39,44 @@ def BGPLVM(seed = default_seed):
# pb.title('After optimisation') # pb.title('After optimisation')
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
m.checkgrad(verbose = 1) m.checkgrad(verbose=1)
return m return m
def GPLVM_oil_100(optimize=True,M=15): def GPLVM_oil_100(optimize=True, M=15):
data = GPy.util.datasets.oil_100() data = GPy.util.datasets.oil_100()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M) m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M)
m.data_labels = data['Y'].argmax(axis=1) m.data_labels = data['Y'].argmax(axis=1)
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
if optimize: if optimize:
m.optimize('scg',messages=1) m.optimize('scg', messages=1)
# plot # plot
print(m) print(m)
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True,N=100,Q=10,M=15): def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q,0.001) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001)
m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M) m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise',0.05) m.constrain_fixed('noise', 0.05)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg',messages=1) m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise') m.unconstrain('noise')
m.constrain_positive('noise') m.constrain_positive('noise')
m.optimize('scg',messages=1) m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
else: else:
m.ensure_default_constraints() m.ensure_default_constraints()
@ -83,7 +84,7 @@ def BGPLVM_oil(optimize=True,N=100,Q=10,M=15):
print(m) print(m)
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
pb.figure() pb.figure()
pb.bar(np.arange(m.kern.D),1./m.input_sensitivity()) pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
return m return m
def oil_100(): def oil_100():
@ -96,7 +97,52 @@ def oil_100():
# plot # plot
print(m) print(m)
#m.plot_latent(labels=data['Y'].argmax(axis=1)) # m.plot_latent(labels=data['Y'].argmax(axis=1))
return m
def mrd_simulation():
# num = 2
ard1 = np.array([1., 1, 0, 0], dtype=float)
ard2 = np.array([0., 1, 1, 0], dtype=float)
ard1[ard1 == 0] = 1E-10
ard2[ard2 == 0] = 1E-10
# ard1i = 1. / ard1
# ard2i = 1. / ard2
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
# Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
# Y1 -= Y1.mean(0)
#
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
# Y2 -= Y2.mean(0)
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
D1, D2, N, M, Q = 50, 100, 150, 15, 4
x = np.linspace(0, 2 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x))
sS = np.vectorize(lambda x: np.sin(2 * x))
S1 = np.hstack([s1(x), sS(x)])
S2 = np.hstack([s2(x), sS(x)])
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False)
m.ensure_default_constraints()
# fig = pyplot.figure("expected", figsize=(8, 3))
# ax = fig.add_subplot(121)
# ax.bar(np.arange(ard1.size) + .1, ard1)
# ax = fig.add_subplot(122)
# ax.bar(np.arange(ard2.size) + .1, ard2)
return m return m
def brendan_faces(): def brendan_faces():
@ -109,7 +155,7 @@ def brendan_faces():
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0,:] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
@ -126,10 +172,39 @@ def stick():
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0,:] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
return m return m
# def BGPLVM_oil():
# data = GPy.util.datasets.oil()
# Y, X = data['Y'], data['X']
# X -= X.mean(axis=0)
# X /= X.std(axis=0)
#
# Q = 10
# M = 30
#
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster
# km = cluster.KMeans(M, verbose=10)
# Z = km.fit(m.X).cluster_centers_
# # Z = GPy.util.misc.kmm_init(m.X, M)
# m.set('iip', Z)
# m.set('bias', 1e-4)
# # optimize
# # m.ensure_default_constraints()
#
# import pdb; pdb.set_trace()
# m.optimize('tnc', messages=1)
# print m
# m.plot_latent(labels=data['Y'].argmax(axis=1))
# return m

View file

@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1 iteration += 1
if display: if display:
print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
if success: if success:

View file

@ -52,7 +52,7 @@ class kern(parameterised):
parameterised.__init__(self) parameterised.__init__(self)
def plot_ARD(self): def plot_ARD(self, ax=pb.gca()):
""" """
If an ARD kernel is present, it bar-plots the ARD parameters If an ARD kernel is present, it bar-plots the ARD parameters
@ -60,17 +60,17 @@ class kern(parameterised):
""" """
for p in self.parts: for p in self.parts:
if hasattr(p, 'ARD') and p.ARD: if hasattr(p, 'ARD') and p.ARD:
pb.figure() ax.set_title('ARD parameters, %s kernel' % p.name)
pb.title('ARD parameters, %s kernel' % p.name)
if p.name == 'linear': if p.name == 'linear':
ard_params = p.variances ard_params = p.variances
else: else:
ard_params = 1./p.lengthscale ard_params = 1./p.lengthscale
pb.bar(np.arange(len(ard_params))-0.4, ard_params) ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
ax.set_xticks(np.arange(len(ard_params)),
["${}$".format(i + 1) for i in range(len(ard_params))])
return ax
def _transform_gradients(self,g): def _transform_gradients(self,g):
x = self._get_params() x = self._get_params()

View file

@ -173,7 +173,7 @@ class rbf(kernpart):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
@ -207,7 +207,6 @@ class rbf(kernpart):
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)): if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING #something's changed. recompute EVERYTHING
#TODO: make more efficient for large Q (using NDL's dot product trick)
#psi1 #psi1
self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1. self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:] self._psi1_dist = Z[None,:,:]-mu[:,None,:]

View file

@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
if X == None: if X == None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:M] Z = np.random.permutation(X.copy())[:M]
assert Z.shape[1]==X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
kernel = kern.rbf(Q) + kern.white(Q) kernel = kern.rbf(Q) + kern.white(Q)
@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
def _get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self)) return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self): def _get_params(self):
@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
""" """
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
def _set_params(self,x): def _set_params(self, x):
N, Q = self.N, self.Q N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy() self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2*N*Q):]) sparse_GP._set_params(self, x[(2 * N * Q):])
def dKL_dmuS(self):
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance) dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance) dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
dKL_dS = (1. - (1./self.X_variance))*0.5 return dL_dmu, dL_dS
dKL_dmu = self.X
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
def KL_divergence(self): def KL_divergence(self):
var_mean = np.square(self.X).sum() var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance)) var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
def log_likelihood(self): def log_likelihood(self):
return sparse_GP.log_likelihood(self) - self.KL_divergence() return sparse_GP.log_likelihood(self) - self.KL_divergence()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
def plot_latent(self, which_indices=None,*args, **kwargs): def plot_latent(self, which_indices=None, *args, **kwargs):
if which_indices is None: if which_indices is None:
try: try:
@ -93,7 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax return ax

View file

@ -60,7 +60,7 @@ class GPLVM(GP):
mu, var, upper, lower = self.predict(Xnew) mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self,labels=None, which_indices=None, resolution=50): def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()):
""" """
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -90,7 +90,7 @@ class GPLVM(GP):
Xtest_full[:, :2] = Xtest Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full) mu, var, low, up = self.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
pb.imshow(var.reshape(resolution,resolution).T[::-1,:], ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear') extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
for i,ul in enumerate(np.unique(labels)): for i,ul in enumerate(np.unique(labels)):
@ -110,15 +110,15 @@ class GPLVM(GP):
y = self.X[index,input_2] y = self.X[index,input_2]
pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
pb.xlabel('latent dimension %i'%input_1) ax.set_xlabel('latent dimension %i' % input_1)
pb.ylabel('latent dimension %i'%input_2) ax.set_ylabel('latent dimension %i' % input_2)
if not np.all(labels==1.): if not np.all(labels==1.):
pb.legend(loc=0,numpoints=1) ax.legend(loc=0, numpoints=1)
pb.xlim(xmin[0],xmax[0]) ax.set_xlim(xmin[0], xmax[0])
pb.ylim(xmin[1],xmax[1]) ax.set_ylim(xmin[1], xmax[1])
pb.grid(b=False) # remove the grid if present, it doesn't look good ax.grid(b=False) # remove the grid if present, it doesn't look good
ax = pb.gca() # ax = pb.gca()
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio
return ax return ax

View file

@ -11,4 +11,7 @@ from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from Bayesian_GPLVM import Bayesian_GPLVM from Bayesian_GPLVM import Bayesian_GPLVM
import mrd
MRD = mrd.MRD
del mrd
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC

180
GPy/models/mrd.py Normal file
View file

@ -0,0 +1,180 @@
'''
Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
import numpy
from GPy.models.sparse_GP import sparse_GP
import itertools
from matplotlib import pyplot
import pylab
class MRD(model):
"""
Do MRD on given Datasets in Ylist.
All Ys in Ylist are in [N x Dn], where Dn can be different per Yn,
N must be shared across datasets though.
:param Ylist...: observed datasets
:type Ylist: [np.ndarray]
:param names: names for different gplvm models
:type names: [str]
:param Q: latent dimensionality
:type Q: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param X:
Initial latent space
:param X_variance:
Initial latent space variance
:param init: [PCA|random]
initialization method to use
:param M:
number of inducing inputs to use
:param Z:
initial inducing inputs
:param kernel:
kernel to use
"""
def __init__(self, *Ylist, **kwargs):
self._debug = False
if kwargs.has_key("_debug"):
self._debug = kwargs['_debug']
del kwargs['_debug']
if kwargs.has_key("names"):
self.names = kwargs['names']
del kwargs['names']
else:
self.names = ["{}".format(i + 1) for i in range(len(Ylist))]
if kwargs.has_key('kernel'):
kernel = kwargs['kernel']
k = lambda: kernel.copy()
del kwargs['kernel']
else:
k = lambda: None
self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), **kwargs) for Y in Ylist]
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.Q = self.gref.Q
self.N = self.gref.N
self.NQ = self.N * self.Q
self.M = self.gref.M
self.MQ = self.M * self.Q
model.__init__(self) # @UndefinedVariable
def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ]
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
def _get_params(self):
"""
return parameter list containing private and shared parameters as follows:
=================================================================
| mu | S | Z || theta1 | theta2 | .. | thetaN |
=================================================================
"""
X = self.gref.X.flatten()
X_var = self.gref.X_variance.flatten()
Z = self.gref.Z.flatten()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
def _set_var_params(self, g, X, X_var, Z):
g.X = X
g.X_variance = X_var
g.Z = Z
def _set_kern_params(self, g, p):
g.kern._set_params(p[:g.kern.Nparam])
g.likelihood._set_params(p[g.kern.Nparam:])
def _set_params(self, x):
start = 0; end = self.NQ
X = x[start:end].reshape(self.N, self.Q).copy()
start = end; end += start
X_var = x[start:end].reshape(self.N, self.Q).copy()
start = end; end += self.MQ
Z = x[start:end].reshape(self.M, self.Q).copy()
thetas = x[end:]
# set params for all others:
for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
self._set_var_params(g, X, X_var, Z)
self._set_kern_params(g, thetas[s:e].copy())
g._compute_kernel_matrices()
g._computations()
def log_likelihood(self):
ll = +self.gref.KL_divergence()
for g in self.bgplvms:
ll -= sparse_GP.log_likelihood(g)
return -ll
def _log_likelihood_gradients(self):
dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
dKLmu, dKLdS = self.gref.dKL_dmuS()
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
numpy.hstack([numpy.hstack([g.dL_dtheta(),
g.likelihood._gradients(\
partial=g.partial_for_likelihood)]) \
for g in self.bgplvms])))
def plot_X(self):
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.X)
pylab.draw()
fig.tight_layout()
return fig
def plot_predict(self):
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.predict(g.X)[0])
pylab.draw()
fig.tight_layout()
return fig
def plot_scales(self, *args, **kwargs):
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.kern.plot_ARD(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
return fig
def plot_latent(self, *args, **kwargs):
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.plot_latent(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
return fig

32
GPy/testing/mrd_tests.py Normal file
View file

@ -0,0 +1,32 @@
# Copyright (c) 2013, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
'''
Created on 10 Apr 2013
@author: maxz
'''
import unittest
import numpy as np
import GPy
class MRDTests(unittest.TestCase):
def test_gradients(self):
num_m = 3
N, M, Q, D = 20, 8, 5, 50
X = np.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -4,7 +4,7 @@ import GPy
import numpy as np import numpy as np
class lvm: class lvm:
def __init__(self, model, data_visualize, latent_axis): def __init__(self, model, data_visualize, latent_axis, latent_index=[0,1], latent_dim=2):
self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click) self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click)
self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move) self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
self.data_visualize = data_visualize self.data_visualize = data_visualize
@ -12,6 +12,8 @@ class lvm:
self.latent_axis = latent_axis self.latent_axis = latent_axis
self.called = False self.called = False
self.move_on = False self.move_on = False
self.latent_index = latent_index
self.latent_dim = latent_dim
def on_click(self, event): def on_click(self, event):
#print 'click', event.xdata, event.ydata #print 'click', event.xdata, event.ydata
@ -32,7 +34,8 @@ class lvm:
if self.called and self.move_on: if self.called and self.move_on:
# Call modify code on move # Call modify code on move
#print 'move', event.xdata, event.ydata #print 'move', event.xdata, event.ydata
latent_values = np.array((event.xdata, event.ydata)) latent_values = np.zeros((1,self.latent_dim))
latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata])
y = self.model.predict(latent_values)[0] y = self.model.predict(latent_values)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
#print 'y', y #print 'y', y
@ -57,7 +60,7 @@ class vector_show(data_show):
def __init__(self, vals, axis=None): def __init__(self, vals, axis=None):
data_show.__init__(self, vals, axis) data_show.__init__(self, vals, axis)
self.vals = vals.T self.vals = vals.T
self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0] self.handle = self.axis.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
def modify(self, vals): def modify(self, vals):
xdata, ydata = self.handle.get_data() xdata, ydata = self.handle.get_data()