2013-03-11 14:05:56 +00:00
|
|
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
|
|
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pylab as pb
|
2013-04-11 15:47:18 +01:00
|
|
|
from matplotlib import pyplot as plt, pyplot
|
2013-04-02 02:20:53 +02:00
|
|
|
|
2013-03-11 14:05:56 +00:00
|
|
|
import GPy
|
2013-04-11 14:54:25 +01:00
|
|
|
from GPy.models.mrd import MRD
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
default_seed = np.random.seed(123344)
|
|
|
|
|
|
2013-04-11 14:54:25 +01:00
|
|
|
def BGPLVM(seed=default_seed):
|
2013-03-11 14:05:56 +00:00
|
|
|
N = 10
|
|
|
|
|
M = 3
|
|
|
|
|
Q = 2
|
|
|
|
|
D = 4
|
2013-04-11 14:54:25 +01:00
|
|
|
# generate GPLVM-like data
|
2013-03-11 14:05:56 +00:00
|
|
|
X = np.random.rand(N, Q)
|
|
|
|
|
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
|
|
|
|
|
K = k.K(X)
|
2013-04-11 14:54:25 +01:00
|
|
|
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
|
2013-03-11 14:05:56 +00:00
|
|
|
|
2013-04-11 14:54:25 +01:00
|
|
|
k = GPy.kern.linear(Q, ARD=True) + GPy.kern.white(Q)
|
2013-03-11 14:05:56 +00:00
|
|
|
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
|
|
|
|
|
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
|
|
|
|
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
|
|
|
|
|
2013-04-11 14:54:25 +01:00
|
|
|
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
|
2013-03-11 14:05:56 +00:00
|
|
|
m.constrain_positive('(rbf|bias|noise|white|S)')
|
|
|
|
|
# m.constrain_fixed('S', 1)
|
|
|
|
|
|
|
|
|
|
# pb.figure()
|
|
|
|
|
# m.plot()
|
|
|
|
|
# pb.title('PCA initialisation')
|
|
|
|
|
# pb.figure()
|
|
|
|
|
# m.optimize(messages = 1)
|
|
|
|
|
# m.plot()
|
|
|
|
|
# pb.title('After optimisation')
|
|
|
|
|
m.ensure_default_constraints()
|
|
|
|
|
m.randomize()
|
2013-04-11 14:54:25 +01:00
|
|
|
m.checkgrad(verbose=1)
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
return m
|
|
|
|
|
|
2013-04-11 14:54:25 +01:00
|
|
|
def GPLVM_oil_100(optimize=True, M=15):
|
2013-04-09 15:43:06 +01:00
|
|
|
data = GPy.util.datasets.oil_100()
|
|
|
|
|
|
|
|
|
|
# create simple GP model
|
2013-04-11 14:54:25 +01:00
|
|
|
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
|
2013-04-10 09:28:58 +01:00
|
|
|
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M)
|
2013-04-09 15:43:06 +01:00
|
|
|
m.data_labels = data['Y'].argmax(axis=1)
|
|
|
|
|
|
|
|
|
|
# optimize
|
|
|
|
|
m.ensure_default_constraints()
|
|
|
|
|
if optimize:
|
2013-04-11 14:54:25 +01:00
|
|
|
m.optimize('scg', messages=1)
|
2013-04-09 15:43:06 +01:00
|
|
|
|
|
|
|
|
# plot
|
|
|
|
|
print(m)
|
|
|
|
|
m.plot_latent(labels=m.data_labels)
|
|
|
|
|
return m
|
|
|
|
|
|
2013-04-11 14:54:25 +01:00
|
|
|
def BGPLVM_oil(optimize=True, N=100, Q=10, M=15):
|
2013-04-10 09:28:58 +01:00
|
|
|
data = GPy.util.datasets.oil()
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
# create simple GP model
|
2013-04-11 14:54:25 +01:00
|
|
|
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001)
|
|
|
|
|
m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M)
|
2013-04-10 09:28:58 +01:00
|
|
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
|
|
|
|
|
2013-03-11 14:05:56 +00:00
|
|
|
# optimize
|
2013-04-09 15:43:06 +01:00
|
|
|
if optimize:
|
2013-04-11 14:54:25 +01:00
|
|
|
m.constrain_fixed('noise', 0.05)
|
2013-04-10 09:28:58 +01:00
|
|
|
m.ensure_default_constraints()
|
2013-04-11 14:54:25 +01:00
|
|
|
m.optimize('scg', messages=1)
|
2013-04-10 09:28:58 +01:00
|
|
|
m.unconstrain('noise')
|
|
|
|
|
m.constrain_positive('noise')
|
2013-04-11 14:54:25 +01:00
|
|
|
m.optimize('scg', messages=1)
|
2013-04-10 09:28:58 +01:00
|
|
|
else:
|
|
|
|
|
m.ensure_default_constraints()
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
# plot
|
|
|
|
|
print(m)
|
2013-04-10 09:28:58 +01:00
|
|
|
m.plot_latent(labels=m.data_labels)
|
|
|
|
|
pb.figure()
|
2013-04-11 14:54:25 +01:00
|
|
|
pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
|
2013-03-11 14:05:56 +00:00
|
|
|
return m
|
2013-04-02 02:20:53 +02:00
|
|
|
|
|
|
|
|
def oil_100():
|
|
|
|
|
data = GPy.util.datasets.oil_100()
|
|
|
|
|
m = GPy.models.GPLVM(data['X'], 2)
|
|
|
|
|
|
|
|
|
|
# optimize
|
|
|
|
|
m.ensure_default_constraints()
|
|
|
|
|
m.optimize(messages=1, max_iters=2)
|
|
|
|
|
|
|
|
|
|
# plot
|
|
|
|
|
print(m)
|
2013-04-11 14:54:25 +01:00
|
|
|
# m.plot_latent(labels=data['Y'].argmax(axis=1))
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
def mrd_simulation():
|
2013-04-11 15:47:18 +01:00
|
|
|
# num = 2
|
2013-04-11 14:54:25 +01:00
|
|
|
ard1 = np.array([1., 1, 0, 0], dtype=float)
|
|
|
|
|
ard2 = np.array([0., 1, 1, 0], dtype=float)
|
2013-04-11 15:47:18 +01:00
|
|
|
ard1[ard1 == 0] = 1E-10
|
|
|
|
|
ard2[ard2 == 0] = 1E-10
|
2013-04-11 14:54:25 +01:00
|
|
|
|
2013-04-11 15:47:18 +01:00
|
|
|
ard1i = 1. / ard1
|
|
|
|
|
ard2i = 1. / ard2
|
|
|
|
|
|
|
|
|
|
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
|
2013-04-11 14:54:25 +01:00
|
|
|
|
|
|
|
|
D1, D2, N, M, Q = 50, 100, 150, 15, 4
|
|
|
|
|
X = np.random.randn(N, Q)
|
|
|
|
|
|
2013-04-11 15:47:18 +01:00
|
|
|
k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
2013-04-11 14:54:25 +01:00
|
|
|
Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
|
|
|
|
|
Y1 -= Y1.mean(0)
|
|
|
|
|
|
2013-04-11 15:47:18 +01:00
|
|
|
k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
|
2013-04-11 14:54:25 +01:00
|
|
|
Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
|
|
|
|
|
Y2 -= Y2.mean(0)
|
|
|
|
|
|
|
|
|
|
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 1.0)
|
|
|
|
|
|
|
|
|
|
m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False)
|
|
|
|
|
m.ensure_default_constraints()
|
|
|
|
|
|
2013-04-11 15:47:18 +01:00
|
|
|
fig = pyplot.figure("expected", figsize=(8, 3))
|
|
|
|
|
ax = fig.add_subplot(121)
|
|
|
|
|
ax.bar(np.arange(ard1.size) + .1, ard1)
|
|
|
|
|
ax = fig.add_subplot(122)
|
|
|
|
|
ax.bar(np.arange(ard2.size) + .1, ard2)
|
2013-04-11 14:54:25 +01:00
|
|
|
|
2013-04-02 02:20:53 +02:00
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
def brendan_faces():
|
|
|
|
|
data = GPy.util.datasets.brendan_faces()
|
2013-04-02 10:49:09 +02:00
|
|
|
Y = data['Y'][0:-1:10, :]
|
|
|
|
|
m = GPy.models.GPLVM(data['Y'], 2)
|
2013-04-02 02:20:53 +02:00
|
|
|
|
|
|
|
|
# optimize
|
|
|
|
|
m.ensure_default_constraints()
|
2013-04-02 10:49:09 +02:00
|
|
|
m.optimize(messages=1, max_f_eval=10000)
|
2013-04-02 02:20:53 +02:00
|
|
|
|
|
|
|
|
ax = m.plot_latent()
|
2013-04-11 14:54:25 +01:00
|
|
|
y = m.likelihood.Y[0, :]
|
2013-04-02 02:20:53 +02:00
|
|
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
|
|
|
|
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
|
|
|
|
|
raw_input('Press enter to finish')
|
|
|
|
|
plt.close('all')
|
|
|
|
|
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
def stick():
|
|
|
|
|
data = GPy.util.datasets.stick()
|
2013-04-02 10:49:09 +02:00
|
|
|
m = GPy.models.GPLVM(data['Y'], 2)
|
2013-04-11 14:54:25 +01:00
|
|
|
|
2013-04-02 02:20:53 +02:00
|
|
|
# optimize
|
|
|
|
|
m.ensure_default_constraints()
|
|
|
|
|
m.optimize(messages=1, max_f_eval=10000)
|
|
|
|
|
|
|
|
|
|
ax = m.plot_latent()
|
2013-04-11 14:54:25 +01:00
|
|
|
y = m.likelihood.Y[0, :]
|
2013-04-02 02:20:53 +02:00
|
|
|
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
|
|
|
|
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
|
|
|
|
|
raw_input('Press enter to finish')
|
|
|
|
|
plt.close('all')
|
|
|
|
|
|
|
|
|
|
return m
|
2013-03-21 15:28:46 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def BGPLVM_oil():
|
|
|
|
|
data = GPy.util.datasets.oil()
|
|
|
|
|
Y, X = data['Y'], data['X']
|
|
|
|
|
X -= X.mean(axis=0)
|
2013-03-22 15:58:02 +00:00
|
|
|
X /= X.std(axis=0)
|
2013-03-21 15:28:46 +00:00
|
|
|
|
|
|
|
|
Q = 10
|
|
|
|
|
M = 30
|
|
|
|
|
|
2013-04-11 15:47:18 +01:00
|
|
|
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
|
2013-03-21 15:28:46 +00:00
|
|
|
m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
|
2013-03-22 15:58:02 +00:00
|
|
|
# m.scale_factor = 100.0
|
2013-03-21 15:28:46 +00:00
|
|
|
m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
|
|
|
|
|
from sklearn import cluster
|
|
|
|
|
km = cluster.KMeans(M, verbose=10)
|
|
|
|
|
Z = km.fit(m.X).cluster_centers_
|
|
|
|
|
# Z = GPy.util.misc.kmm_init(m.X, M)
|
|
|
|
|
m.set('iip', Z)
|
2013-03-22 15:58:02 +00:00
|
|
|
m.set('bias', 1e-4)
|
2013-03-21 15:28:46 +00:00
|
|
|
# optimize
|
|
|
|
|
# m.ensure_default_constraints()
|
|
|
|
|
|
|
|
|
|
import pdb; pdb.set_trace()
|
|
|
|
|
m.optimize('tnc', messages=1)
|
|
|
|
|
print m
|
|
|
|
|
m.plot_latent(labels=data['Y'].argmax(axis=1))
|
|
|
|
|
return m
|