manual merge in plot_latent

This commit is contained in:
James Hensman 2013-04-12 22:44:11 +01:00
commit 1d094229df
11 changed files with 380 additions and 84 deletions

View file

@ -3,29 +3,30 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from matplotlib import pyplot as plt from matplotlib import pyplot as plt, pyplot
import GPy import GPy
from GPy.models.mrd import MRD
default_seed = np.random.seed(123344) default_seed = np.random.seed(123344)
def BGPLVM(seed = default_seed): def BGPLVM(seed=default_seed):
N = 10 N = 10
M = 3 M = 3
Q = 2 Q = 2
D = 4 D = 4
#generate GPLVM-like data # generate GPLVM-like data
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N), K, D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q) k = GPy.kern.linear(Q, ARD=True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)
@ -38,44 +39,53 @@ def BGPLVM(seed = default_seed):
# pb.title('After optimisation') # pb.title('After optimisation')
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
m.checkgrad(verbose = 1) m.checkgrad(verbose=1)
return m return m
<<<<<<< HEAD
def GPLVM_oil_100(optimize=True): def GPLVM_oil_100(optimize=True):
data = GPy.util.datasets.oil_100() data = GPy.util.datasets.oil_100()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel) m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
=======
def GPLVM_oil_100(optimize=True, M=15):
data = GPy.util.datasets.oil_100()
# create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M)
>>>>>>> f6b98160a7c0ace6ca5f795aeb878d30b8aaf6a4
m.data_labels = data['Y'].argmax(axis=1) m.data_labels = data['Y'].argmax(axis=1)
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
if optimize: if optimize:
m.optimize('scg',messages=1) m.optimize('scg', messages=1)
# plot # plot
print(m) print(m)
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def BGPLVM_oil(optimize=True,N=100,Q=10,M=15,max_f_eval=300): def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q,0.001) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001)
m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M) m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise',0.05) m.constrain_fixed('noise', 0.05)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg',messages=1,max_f_eval=max(80,max_f_eval)) m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
m.unconstrain('noise') m.unconstrain('noise')
m.constrain_positive('noise') m.constrain_positive('noise')
m.optimize('scg',messages=1,max_f_eval=max(0,max_f_eval-80)) m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
else: else:
m.ensure_default_constraints() m.ensure_default_constraints()
@ -83,7 +93,7 @@ def BGPLVM_oil(optimize=True,N=100,Q=10,M=15,max_f_eval=300):
print(m) print(m)
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
pb.figure() pb.figure()
pb.bar(np.arange(m.kern.D),1./m.input_sensitivity()) pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
return m return m
def oil_100(): def oil_100():
@ -96,7 +106,52 @@ def oil_100():
# plot # plot
print(m) print(m)
#m.plot_latent(labels=data['Y'].argmax(axis=1)) # m.plot_latent(labels=data['Y'].argmax(axis=1))
return m
def mrd_simulation():
# num = 2
ard1 = np.array([1., 1, 0, 0], dtype=float)
ard2 = np.array([0., 1, 1, 0], dtype=float)
ard1[ard1 == 0] = 1E-10
ard2[ard2 == 0] = 1E-10
# ard1i = 1. / ard1
# ard2i = 1. / ard2
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
# Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T
# Y1 -= Y1.mean(0)
#
# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001)
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
# Y2 -= Y2.mean(0)
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
D1, D2, N, M, Q = 50, 100, 150, 15, 4
x = np.linspace(0, 2 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x))
sS = np.vectorize(lambda x: np.sin(2 * x))
S1 = np.hstack([s1(x), sS(x)])
S2 = np.hstack([s2(x), sS(x)])
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False)
m.ensure_default_constraints()
# fig = pyplot.figure("expected", figsize=(8, 3))
# ax = fig.add_subplot(121)
# ax.bar(np.arange(ard1.size) + .1, ard1)
# ax = fig.add_subplot(122)
# ax.bar(np.arange(ard2.size) + .1, ard2)
return m return m
def brendan_faces(): def brendan_faces():
@ -109,7 +164,7 @@ def brendan_faces():
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0,:] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
@ -126,10 +181,39 @@ def stick():
m.optimize(messages=1, max_f_eval=10000) m.optimize(messages=1, max_f_eval=10000)
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0,:] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close('all') plt.close('all')
return m return m
# def BGPLVM_oil():
# data = GPy.util.datasets.oil()
# Y, X = data['Y'], data['X']
# X -= X.mean(axis=0)
# X /= X.std(axis=0)
#
# Q = 10
# M = 30
#
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
# # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster
# km = cluster.KMeans(M, verbose=10)
# Z = km.fit(m.X).cluster_centers_
# # Z = GPy.util.misc.kmm_init(m.X, M)
# m.set('iip', Z)
# m.set('bias', 1e-4)
# # optimize
# # m.ensure_default_constraints()
#
# import pdb; pdb.set_trace()
# m.optimize('tnc', messages=1)
# print m
# m.plot_latent(labels=data['Y'].argmax(axis=1))
# return m

View file

@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
iteration += 1 iteration += 1
if display: if display:
print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
if success: if success:

View file

@ -75,7 +75,10 @@ class opt_SGD(Optimizer):
return (np.isnan(data).sum(axis=1) == 0) return (np.isnan(data).sum(axis=1) == 0)
def check_for_missing(self, data): def check_for_missing(self, data):
return np.isnan(data).sum() > 0 if sp.sparse.issparse(self.model.likelihood.Y):
return True
else:
return np.isnan(data).sum() > 0
def subset_parameter_vector(self, x, samples, param_shapes): def subset_parameter_vector(self, x, samples, param_shapes):
subset = np.array([], dtype = int) subset = np.array([], dtype = int)
@ -149,10 +152,10 @@ class opt_SGD(Optimizer):
else: else:
raise NotImplementedError raise NotImplementedError
def step_with_missing_data(self, f_fp, X, step, shapes, sparse_matrix): def step_with_missing_data(self, f_fp, X, step, shapes):
N, Q = X.shape N, Q = X.shape
if not sparse_matrix: if not sp.sparse.issparse(self.model.likelihood.Y):
Y = self.model.likelihood.Y Y = self.model.likelihood.Y
samples = self.non_null_samples(self.model.likelihood.Y) samples = self.non_null_samples(self.model.likelihood.Y)
self.model.N = samples.sum() self.model.N = samples.sum()
@ -165,7 +168,6 @@ class opt_SGD(Optimizer):
if self.model.N == 0 or Y.std() == 0.0: if self.model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N return 0, step, self.model.N
# FIXME: get rid of self.center, everything should be centered by default
self.model.likelihood._mean = Y.mean() self.model.likelihood._mean = Y.mean()
self.model.likelihood._std = Y.std() self.model.likelihood._std = Y.std()
self.model.likelihood.set_data(Y) self.model.likelihood.set_data(Y)
@ -173,10 +175,6 @@ class opt_SGD(Optimizer):
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.model.X = X[samples]
# if self.center:
# self.model.likelihood.Y -= self.model.likelihood.Y.mean()
# self.model.likelihood.Y /= self.model.likelihood.Y.std()
model_name = self.model.__class__.__name__ model_name = self.model.__class__.__name__
if model_name == 'Bayesian_GPLVM': if model_name == 'Bayesian_GPLVM':
@ -185,33 +183,31 @@ class opt_SGD(Optimizer):
b, p = self.shift_constraints(j) b, p = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
# momentum_term = self.momentum * step[j]
# step[j] = self.learning_rate[j] * fp
# self.x_opt[j] -= step[j] + momentum_term
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j] self.x_opt[j] -= step[j]
self.restore_constraints(b, p) self.restore_constraints(b, p)
# restore likelihood _mean and _std, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._mean = 0
self.model.likelihood._std = 1
return f, step, self.model.N return f, step, self.model.N
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed() self.x_opt = self.model._get_params_transformed()
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() X, Y = self.model.X.copy(), self.model.likelihood.Y.copy()
N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1]
self.trace = []
sparse_matrix = sp.sparse.issparse(self.model.likelihood.Y)
missing_data = True
if not sparse_matrix:
missing_data = self.check_for_missing(self.model.likelihood.Y)
self.model.likelihood.YYT = None self.model.likelihood.YYT = None
self.model.likelihood.trYYT = None self.model.likelihood.trYYT = None
self.model.likelihood._mean = 0.0 self.model.likelihood._mean = 0.0
self.model.likelihood._std = 1.0 self.model.likelihood._std = 1.0
N, Q = self.model.X.shape
D = self.model.likelihood.Y.shape[1]
num_params = self.model._get_params() num_params = self.model._get_params()
self.trace = []
missing_data = self.check_for_missing(self.model.likelihood.Y)
step = np.zeros_like(num_params) step = np.zeros_like(num_params)
for it in range(self.iterations): for it in range(self.iterations):
@ -224,34 +220,26 @@ class opt_SGD(Optimizer):
b = len(features)/self.batch_size b = len(features)/self.batch_size
features = [features[i::b] for i in range(b)] features = [features[i::b] for i in range(b)]
NLL = [] NLL = []
count = 0
last_printed_count = -1
for j in features: for count, j in enumerate(features):
count += 1
self.model.D = len(j) self.model.D = len(j)
self.model.likelihood.D = len(j) self.model.likelihood.D = len(j)
self.model.likelihood.set_data(Y[:, j]) self.model.likelihood.set_data(Y[:, j])
if missing_data or sparse_matrix: if missing_data:
shapes = self.get_param_shapes(N, Q) shapes = self.get_param_shapes(N, Q)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes, sparse_matrix) f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
else: else:
Nj = N Nj = N
f, fp = f_fp(self.x_opt) f, fp = f_fp(self.x_opt)
# momentum_term = self.momentum * step # compute momentum using update(t-1)
# step = self.learning_rate * fp # compute update(t)
# self.x_opt -= step + momentum_term
step = self.momentum * step + self.learning_rate * fp step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step self.x_opt -= step
if self.messages == 2: if self.messages == 2:
noise = self.model.likelihood._variance noise = self.model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
last_printed_count = count
self.param_traces['noise'].append(noise) self.param_traces['noise'].append(noise)
NLL.append(f) NLL.append(f)
@ -269,7 +257,6 @@ class opt_SGD(Optimizer):
self.model.likelihood.D = D self.model.likelihood.D = D
self.model.likelihood.Y = Y self.model.likelihood.Y = Y
# self.model.Youter = np.dot(Y, Y.T)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None: if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w') f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
@ -282,7 +269,3 @@ class opt_SGD(Optimizer):
status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt) status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt)
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()

View file

@ -52,7 +52,7 @@ class kern(parameterised):
parameterised.__init__(self) parameterised.__init__(self)
def plot_ARD(self): def plot_ARD(self, ax=pb.gca()):
""" """
If an ARD kernel is present, it bar-plots the ARD parameters If an ARD kernel is present, it bar-plots the ARD parameters
@ -60,17 +60,17 @@ class kern(parameterised):
""" """
for p in self.parts: for p in self.parts:
if hasattr(p, 'ARD') and p.ARD: if hasattr(p, 'ARD') and p.ARD:
pb.figure() ax.set_title('ARD parameters, %s kernel' % p.name)
pb.title('ARD parameters, %s kernel' % p.name)
if p.name == 'linear': if p.name == 'linear':
ard_params = p.variances ard_params = p.variances
else: else:
ard_params = 1./p.lengthscale ard_params = 1./p.lengthscale
pb.bar(np.arange(len(ard_params))-0.4, ard_params) ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
ax.set_xticks(np.arange(len(ard_params)),
["${}$".format(i + 1) for i in range(len(ard_params))])
return ax
def _transform_gradients(self,g): def _transform_gradients(self,g):
x = self._get_params() x = self._get_params()

View file

@ -227,9 +227,8 @@ class rbf(kernpart):
def weave_psi2(self,mu,Zhat): def weave_psi2(self,mu,Zhat):
weave_options = {'headers' : ['<omp.h>'], weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp'], 'extra_link_args' : ['-lgomp']}
'compiler' : 'gcc'}
N,Q = mu.shape N,Q = mu.shape
M = Zhat.shape[0] M = Zhat.shape[0]

View file

@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
if X == None: if X == None:
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:M] Z = np.random.permutation(X.copy())[:M]
assert Z.shape[1]==X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
kernel = kern.rbf(Q) + kern.white(Q) kernel = kern.rbf(Q) + kern.white(Q)
@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
def _get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
return (X_names + S_names + sparse_GP._get_param_names(self)) return (X_names + S_names + sparse_GP._get_param_names(self))
def _get_params(self): def _get_params(self):
@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
""" """
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
def _set_params(self,x): def _set_params(self, x):
N, Q = self.N, self.Q N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy() self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2*N*Q):]) sparse_GP._set_params(self, x[(2 * N * Q):])
def dKL_dmuS(self):
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance) dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance) dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
dKL_dS = (1. - (1./self.X_variance))*0.5 return dL_dmu, dL_dS
dKL_dmu = self.X
return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
def KL_divergence(self): def KL_divergence(self):
var_mean = np.square(self.X).sum() var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance)) var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
def log_likelihood(self): def log_likelihood(self):
return sparse_GP.log_likelihood(self) - self.KL_divergence() return sparse_GP.log_likelihood(self) - self.KL_divergence()
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
def plot_latent(self, which_indices=None,*args, **kwargs): def plot_latent(self, which_indices=None, *args, **kwargs):
if which_indices is None: if which_indices is None:
try: try:
@ -93,6 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax return ax

View file

@ -60,7 +60,7 @@ class GPLVM(GP):
mu, var, upper, lower = self.predict(Xnew) mu, var, upper, lower = self.predict(Xnew)
pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
def plot_latent(self,labels=None, which_indices=None, resolution=50,ax=pb.gca()): def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()):
""" """
:param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -90,7 +90,7 @@ class GPLVM(GP):
Xtest_full[:, :2] = Xtest Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full) mu, var, low, up = self.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
ax.imshow(var.reshape(resolution,resolution).T[::-1,:], ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear') extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
for i,ul in enumerate(np.unique(labels)): for i,ul in enumerate(np.unique(labels)):

View file

@ -11,4 +11,7 @@ from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from Bayesian_GPLVM import Bayesian_GPLVM from Bayesian_GPLVM import Bayesian_GPLVM
import mrd
MRD = mrd.MRD
del mrd
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC

180
GPy/models/mrd.py Normal file
View file

@ -0,0 +1,180 @@
'''
Created on 10 Apr 2013
@author: Max Zwiessele
'''
from GPy.core import model
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
import numpy
from GPy.models.sparse_GP import sparse_GP
import itertools
from matplotlib import pyplot
import pylab
class MRD(model):
"""
Do MRD on given Datasets in Ylist.
All Ys in Ylist are in [N x Dn], where Dn can be different per Yn,
N must be shared across datasets though.
:param Ylist...: observed datasets
:type Ylist: [np.ndarray]
:param names: names for different gplvm models
:type names: [str]
:param Q: latent dimensionality
:type Q: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param X:
Initial latent space
:param X_variance:
Initial latent space variance
:param init: [PCA|random]
initialization method to use
:param M:
number of inducing inputs to use
:param Z:
initial inducing inputs
:param kernel:
kernel to use
"""
def __init__(self, *Ylist, **kwargs):
self._debug = False
if kwargs.has_key("_debug"):
self._debug = kwargs['_debug']
del kwargs['_debug']
if kwargs.has_key("names"):
self.names = kwargs['names']
del kwargs['names']
else:
self.names = ["{}".format(i + 1) for i in range(len(Ylist))]
if kwargs.has_key('kernel'):
kernel = kwargs['kernel']
k = lambda: kernel.copy()
del kwargs['kernel']
else:
k = lambda: None
self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), **kwargs) for Y in Ylist]
self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.Q = self.gref.Q
self.N = self.gref.N
self.NQ = self.N * self.Q
self.M = self.gref.M
self.MQ = self.M * self.Q
model.__init__(self) # @UndefinedVariable
def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ]
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
sparse_GP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
def _get_params(self):
"""
return parameter list containing private and shared parameters as follows:
=================================================================
| mu | S | Z || theta1 | theta2 | .. | thetaN |
=================================================================
"""
X = self.gref.X.flatten()
X_var = self.gref.X_variance.flatten()
Z = self.gref.Z.flatten()
thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)])
return params
def _set_var_params(self, g, X, X_var, Z):
g.X = X
g.X_variance = X_var
g.Z = Z
def _set_kern_params(self, g, p):
g.kern._set_params(p[:g.kern.Nparam])
g.likelihood._set_params(p[g.kern.Nparam:])
def _set_params(self, x):
start = 0; end = self.NQ
X = x[start:end].reshape(self.N, self.Q).copy()
start = end; end += start
X_var = x[start:end].reshape(self.N, self.Q).copy()
start = end; end += self.MQ
Z = x[start:end].reshape(self.M, self.Q).copy()
thetas = x[end:]
# set params for all others:
for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
self._set_var_params(g, X, X_var, Z)
self._set_kern_params(g, thetas[s:e].copy())
g._compute_kernel_matrices()
g._computations()
def log_likelihood(self):
ll = +self.gref.KL_divergence()
for g in self.bgplvms:
ll -= sparse_GP.log_likelihood(g)
return -ll
def _log_likelihood_gradients(self):
dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
dKLmu, dKLdS = self.gref.dKL_dmuS()
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS,
dldzt1,
numpy.hstack([numpy.hstack([g.dL_dtheta(),
g.likelihood._gradients(\
partial=g.partial_for_likelihood)]) \
for g in self.bgplvms])))
def plot_X(self):
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.X)
pylab.draw()
fig.tight_layout()
return fig
def plot_predict(self):
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.predict(g.X)[0])
pylab.draw()
fig.tight_layout()
return fig
def plot_scales(self, *args, **kwargs):
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.kern.plot_ARD(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
return fig
def plot_latent(self, *args, **kwargs):
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.plot_latent(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
return fig

View file

@ -36,7 +36,7 @@ class sparse_GP(GP):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False
self.Z = Z self.Z = Z
self.Zslices = Zslices self.Zslices = Zslices
self.Xslices = Xslices self.Xslices = Xslices
@ -184,6 +184,12 @@ class sparse_GP(GP):
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
if self.auto_scale_factor:
if self.likelihood.is_heteroscedastic:
self.scale_factor = max(100.,(self.psi2_beta_scaled.sum(0).max()))
print self.scale_factor
else:
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
self._computations() self._computations()
def _get_params(self): def _get_params(self):

32
GPy/testing/mrd_tests.py Normal file
View file

@ -0,0 +1,32 @@
# Copyright (c) 2013, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
'''
Created on 10 Apr 2013
@author: maxz
'''
import unittest
import numpy as np
import GPy
class MRDTests(unittest.TestCase):
def test_gradients(self):
num_m = 3
N, M, Q, D = 20, 8, 5, 50
X = np.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()