mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 20:42:39 +02:00
finished mrd and added plotting functions
This commit is contained in:
parent
de6b00ebfd
commit
51ff92e591
7 changed files with 130 additions and 75 deletions
|
|
@ -107,32 +107,41 @@ def mrd_simulation():
|
||||||
ard1[ard1 == 0] = 1E-10
|
ard1[ard1 == 0] = 1E-10
|
||||||
ard2[ard2 == 0] = 1E-10
|
ard2[ard2 == 0] = 1E-10
|
||||||
|
|
||||||
ard1i = 1. / ard1
|
# ard1i = 1. / ard1
|
||||||
ard2i = 1. / ard2
|
# ard2i = 1. / ard2
|
||||||
|
|
||||||
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
|
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
||||||
|
# Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
|
||||||
|
# Y1 -= Y1.mean(0)
|
||||||
|
#
|
||||||
|
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
||||||
|
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
|
||||||
|
# Y2 -= Y2.mean(0)
|
||||||
|
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
|
||||||
|
|
||||||
D1, D2, N, M, Q = 50, 100, 150, 15, 4
|
D1, D2, N, M, Q = 50, 100, 150, 15, 4
|
||||||
X = np.random.randn(N, Q)
|
x = np.linspace(0, 2 * np.pi, N)[:, None]
|
||||||
|
|
||||||
k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
s1 = np.vectorize(lambda x: np.sin(x))
|
||||||
Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
|
s2 = np.vectorize(lambda x: np.cos(x))
|
||||||
Y1 -= Y1.mean(0)
|
sS = np.vectorize(lambda x: np.sin(2 * x))
|
||||||
|
|
||||||
k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
S1 = np.hstack([s1(x), sS(x)])
|
||||||
Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
|
S2 = np.hstack([s2(x), sS(x)])
|
||||||
Y2 -= Y2.mean(0)
|
|
||||||
|
|
||||||
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 1.0)
|
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
|
||||||
|
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
||||||
|
|
||||||
m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False)
|
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
|
||||||
|
|
||||||
|
m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
fig = pyplot.figure("expected", figsize=(8, 3))
|
# fig = pyplot.figure("expected", figsize=(8, 3))
|
||||||
ax = fig.add_subplot(121)
|
# ax = fig.add_subplot(121)
|
||||||
ax.bar(np.arange(ard1.size) + .1, ard1)
|
# ax.bar(np.arange(ard1.size) + .1, ard1)
|
||||||
ax = fig.add_subplot(122)
|
# ax = fig.add_subplot(122)
|
||||||
ax.bar(np.arange(ard2.size) + .1, ard2)
|
# ax.bar(np.arange(ard2.size) + .1, ard2)
|
||||||
|
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
|
|
||||||
iteration += 1
|
iteration += 1
|
||||||
if display:
|
if display:
|
||||||
print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta),
|
||||||
|
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ class kern(parameterised):
|
||||||
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
||||||
ax.set_xticks(np.arange(len(ard_params)),
|
ax.set_xticks(np.arange(len(ard_params)),
|
||||||
["${}$".format(i + 1) for i in range(len(ard_params))])
|
["${}$".format(i + 1) for i in range(len(ard_params))])
|
||||||
|
return ax
|
||||||
|
|
||||||
def _transform_gradients(self,g):
|
def _transform_gradients(self,g):
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
|
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
|
||||||
if X == None:
|
if X == None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
|
|
||||||
|
|
@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
|
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:M]
|
Z = np.random.permutation(X.copy())[:M]
|
||||||
assert Z.shape[1]==X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(Q) + kern.white(Q)
|
kernel = kern.rbf(Q) + kern.white(Q)
|
||||||
|
|
@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
|
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||||
S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
|
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||||
return (X_names + S_names + sparse_GP._get_param_names(self))
|
return (X_names + S_names + sparse_GP._get_param_names(self))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
N, Q = self.N, self.Q
|
N, Q = self.N, self.Q
|
||||||
self.X = x[:self.X.size].reshape(N,Q).copy()
|
self.X = x[:self.X.size].reshape(N, Q).copy()
|
||||||
self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
|
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
||||||
sparse_GP._set_params(self, x[(2*N*Q):])
|
sparse_GP._set_params(self, x[(2 * N * Q):])
|
||||||
|
|
||||||
|
|
||||||
|
def dKL_dmuS(self):
|
||||||
|
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
|
||||||
|
dKL_dmu = self.X
|
||||||
|
return dKL_dmu, dKL_dS
|
||||||
|
|
||||||
def dL_dmuS(self):
|
def dL_dmuS(self):
|
||||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance)
|
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance)
|
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
||||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance)
|
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||||
|
|
||||||
dKL_dS = (1. - (1./self.X_variance))*0.5
|
return dL_dmu, dL_dS
|
||||||
dKL_dmu = self.X
|
|
||||||
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
|
|
||||||
|
|
||||||
def KL_divergence(self):
|
def KL_divergence(self):
|
||||||
var_mean = np.square(self.X).sum()
|
var_mean = np.square(self.X).sum()
|
||||||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
||||||
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N
|
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return sparse_GP.log_likelihood(self) - self.KL_divergence()
|
return sparse_GP.log_likelihood(self) - self.KL_divergence()
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self)))
|
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||||
|
dL_dmu, dL_dS = self.dL_dmuS()
|
||||||
|
# TODO: find way to make faster
|
||||||
|
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
|
||||||
|
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
|
||||||
|
|
||||||
def plot_latent(self, which_indices=None,*args, **kwargs):
|
def plot_latent(self, which_indices=None, *args, **kwargs):
|
||||||
|
|
||||||
if which_indices is None:
|
if which_indices is None:
|
||||||
try:
|
try:
|
||||||
|
|
@ -93,6 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
|
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
|
||||||
else:
|
else:
|
||||||
input_1, input_2 = which_indices
|
input_1, input_2 = which_indices
|
||||||
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
|
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
||||||
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
||||||
return ax
|
return ax
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ class GPLVM(GP):
|
||||||
mu, var, upper, lower = self.predict(Xnew)
|
mu, var, upper, lower = self.predict(Xnew)
|
||||||
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
|
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
|
||||||
|
|
||||||
def plot_latent(self,labels=None, which_indices=None, resolution=50):
|
def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()):
|
||||||
"""
|
"""
|
||||||
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
|
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
|
||||||
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
||||||
|
|
@ -90,7 +90,7 @@ class GPLVM(GP):
|
||||||
Xtest_full[:, :2] = Xtest
|
Xtest_full[:, :2] = Xtest
|
||||||
mu, var, low, up = self.predict(Xtest_full)
|
mu, var, low, up = self.predict(Xtest_full)
|
||||||
var = var[:, :1]
|
var = var[:, :1]
|
||||||
pb.imshow(var.reshape(resolution,resolution).T[::-1,:],
|
ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
|
||||||
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
|
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
|
||||||
|
|
||||||
for i,ul in enumerate(np.unique(labels)):
|
for i,ul in enumerate(np.unique(labels)):
|
||||||
|
|
@ -110,15 +110,15 @@ class GPLVM(GP):
|
||||||
y = self.X[index,input_2]
|
y = self.X[index,input_2]
|
||||||
pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
|
pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
|
||||||
|
|
||||||
pb.xlabel('latent dimension %i'%input_1)
|
ax.set_xlabel('latent dimension %i' % input_1)
|
||||||
pb.ylabel('latent dimension %i'%input_2)
|
ax.set_ylabel('latent dimension %i' % input_2)
|
||||||
|
|
||||||
if not np.all(labels==1.):
|
if not np.all(labels==1.):
|
||||||
pb.legend(loc=0,numpoints=1)
|
ax.legend(loc=0, numpoints=1)
|
||||||
|
|
||||||
pb.xlim(xmin[0],xmax[0])
|
ax.set_xlim(xmin[0], xmax[0])
|
||||||
pb.ylim(xmin[1],xmax[1])
|
ax.set_ylim(xmin[1], xmax[1])
|
||||||
pb.grid(b=False) # remove the grid if present, it doesn't look good
|
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
||||||
ax = pb.gca()
|
# ax = pb.gca()
|
||||||
ax.set_aspect('auto') # set a nice aspect ratio
|
ax.set_aspect('auto') # set a nice aspect ratio
|
||||||
return ax
|
return ax
|
||||||
|
|
|
||||||
|
|
@ -121,23 +121,60 @@ class MRD(model):
|
||||||
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
ll = self.gref.KL_divergence()
|
ll = +self.gref.KL_divergence()
|
||||||
for g in self.bgplvms:
|
for g in self.bgplvms:
|
||||||
ll += sparse_GP.log_likelihood(g)
|
ll -= sparse_GP.log_likelihood(g)
|
||||||
return ll
|
return -ll
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
dldmus = self.gref.dL_dmuS().flatten()
|
dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
|
||||||
dldzt1 = sparse_GP._log_likelihood_gradients(self.gref)
|
dKLmu, dKLdS = self.gref.dKL_dmuS()
|
||||||
return numpy.hstack((dldmus,
|
dLdmu -= dKLmu
|
||||||
|
dLdS -= dKLdS
|
||||||
|
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
|
||||||
|
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
|
||||||
|
|
||||||
|
return numpy.hstack((dLdmuS,
|
||||||
dldzt1,
|
dldzt1,
|
||||||
numpy.hstack([numpy.hstack([g.dL_dtheta(),
|
numpy.hstack([numpy.hstack([g.dL_dtheta(),
|
||||||
g.likelihood._gradients(\
|
g.likelihood._gradients(\
|
||||||
partial=g.partial_for_likelihood)]) \
|
partial=g.partial_for_likelihood)]) \
|
||||||
for g in self.bgplvms[1:]])))
|
for g in self.bgplvms])))
|
||||||
|
|
||||||
def plot_scales(self):
|
def plot_X(self):
|
||||||
|
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
|
||||||
|
fig.clf()
|
||||||
|
for i, g in enumerate(self.bgplvms):
|
||||||
|
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||||
|
ax.imshow(g.X)
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def plot_predict(self):
|
||||||
|
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
|
||||||
|
fig.clf()
|
||||||
|
for i, g in enumerate(self.bgplvms):
|
||||||
|
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||||
|
ax.imshow(g.predict(g.X)[0])
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def plot_scales(self, *args, **kwargs):
|
||||||
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
|
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
|
||||||
for i, g in enumerate(self.bgplvms):
|
for i, g in enumerate(self.bgplvms):
|
||||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||||
g.kern.plot_ARD(ax=ax)
|
g.kern.plot_ARD(ax=ax, *args, **kwargs)
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def plot_latent(self, *args, **kwargs):
|
||||||
|
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
|
||||||
|
for i, g in enumerate(self.bgplvms):
|
||||||
|
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||||
|
g.plot_latent(ax=ax, *args, **kwargs)
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
return fig
|
||||||
|
|
|
||||||
|
|
@ -10,23 +10,23 @@ import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
# class MRDTests(unittest.TestCase):
|
class MRDTests(unittest.TestCase):
|
||||||
#
|
|
||||||
# # @unittest.skip('')
|
def test_gradients(self):
|
||||||
# def test_gradients(self):
|
num_m = 3
|
||||||
# num_m = 2
|
N, M, Q, D = 20, 8, 5, 50
|
||||||
# N, M, Q, D = 10, 3, 2, 4
|
X = np.random.rand(N, Q)
|
||||||
# X = np.random.rand(N, Q)
|
|
||||||
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q)
|
||||||
# K = k.K(X)
|
K = k.K(X)
|
||||||
# Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
|
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
|
||||||
#
|
|
||||||
# m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M)
|
m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M)
|
||||||
# m._debug = True
|
m.ensure_default_constraints()
|
||||||
# m.ensure_default_constraints()
|
m.randomize()
|
||||||
# m.randomize()
|
|
||||||
# self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
#
|
|
||||||
# if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# print "Running unit tests, please be (very) patient..."
|
print "Running unit tests, please be (very) patient..."
|
||||||
# # unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue