From 4953d71b7a3cfe19c29f5155d0285ffb9d7b2025 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 14:54:25 +0100 Subject: [PATCH 01/11] first trivial model touches --- GPy/examples/dimensionality_reduction.py | 71 +++++++++++++++++------- GPy/models/GPLVM.py | 4 +- GPy/models/__init__.py | 3 + 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 61a4abd8..a82e0af4 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -6,26 +6,27 @@ import pylab as pb from matplotlib import pyplot as plt import GPy +from GPy.models.mrd import MRD default_seed = np.random.seed(123344) -def BGPLVM(seed = default_seed): +def BGPLVM(seed=default_seed): N = 10 M = 3 Q = 2 D = 4 - #generate GPLVM-like data + # generate GPLVM-like data X = np.random.rand(N, Q) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N), K, D).T - k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q) + k = GPy.kern.linear(Q, ARD=True) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M) + m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) @@ -38,44 +39,44 @@ def BGPLVM(seed = default_seed): # pb.title('After optimisation') m.ensure_default_constraints() m.randomize() - m.checkgrad(verbose = 1) + m.checkgrad(verbose=1) return m -def GPLVM_oil_100(optimize=True,M=15): +def GPLVM_oil_100(optimize=True, M=15): data = GPy.util.datasets.oil_100() # create simple GP model - kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6) + kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) m = GPy.models.GPLVM(data['X'], 6, kernel=kernel, M=M) m.data_labels = data['Y'].argmax(axis=1) # optimize m.ensure_default_constraints() if optimize: - m.optimize('scg',messages=1) + m.optimize('scg', messages=1) # plot print(m) m.plot_latent(labels=m.data_labels) return m -def BGPLVM_oil(optimize=True,N=100,Q=10,M=15): +def BGPLVM_oil(optimize=True, N=100, Q=10, M=15): data = GPy.util.datasets.oil() # create simple GP model - kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q,0.001) - m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel = kernel,M=M) + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.001) + m = GPy.models.Bayesian_GPLVM(data['X'][:N], Q, kernel=kernel, M=M) m.data_labels = data['Y'][:N].argmax(axis=1) # optimize if optimize: - m.constrain_fixed('noise',0.05) + m.constrain_fixed('noise', 0.05) m.ensure_default_constraints() - m.optimize('scg',messages=1) + m.optimize('scg', messages=1) m.unconstrain('noise') m.constrain_positive('noise') - m.optimize('scg',messages=1) + m.optimize('scg', messages=1) else: m.ensure_default_constraints() @@ -83,7 +84,7 @@ def BGPLVM_oil(optimize=True,N=100,Q=10,M=15): print(m) m.plot_latent(labels=m.data_labels) pb.figure() - pb.bar(np.arange(m.kern.D),1./m.input_sensitivity()) + pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity()) return m def oil_100(): @@ -96,7 +97,37 @@ def oil_100(): # plot print(m) - #m.plot_latent(labels=data['Y'].argmax(axis=1)) + # m.plot_latent(labels=data['Y'].argmax(axis=1)) + return m + +def mrd_simulation(): + num = 2 + ard1 = np.array([1., 1, 0, 0], dtype=float) + ard2 = np.array([0., 1, 1, 0], dtype=float) + ard1[ard1 == 0] = 1E+10 + ard2[ard2 == 0] = 1E+10 + + make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) + + D1, D2, N, M, Q = 50, 100, 150, 15, 4 + X = np.random.randn(N, Q) + + k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1) + GPy.kern.bias(Q, 1) + GPy.kern.white(Q, 0.0001) + Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T + Y1 -= Y1.mean(0) + + k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2) + GPy.kern.bias(Q, 1) + GPy.kern.white(Q, 0.0001) + Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T + Y2 -= Y2.mean(0) + + k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 1.0) + + m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False) + m.ensure_default_constraints() + + m.optimize(messages=1, max_f_eval=5000) + + import ipdb;ipdb.set_trace() return m def brendan_faces(): @@ -109,7 +140,7 @@ def brendan_faces(): m.optimize(messages=1, max_f_eval=10000) ax = m.plot_latent() - y = m.likelihood.Y[0,:] + y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) raw_input('Press enter to finish') @@ -120,13 +151,13 @@ def brendan_faces(): def stick(): data = GPy.util.datasets.stick() m = GPy.models.GPLVM(data['Y'], 2) - + # optimize m.ensure_default_constraints() m.optimize(messages=1, max_f_eval=10000) ax = m.plot_latent() - y = m.likelihood.Y[0,:] + y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax) raw_input('Press enter to finish') diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index cc4be70e..2b1002eb 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -55,7 +55,7 @@ class GPLVM(GP): def plot(self): assert self.likelihood.Y.shape[1]==2 - pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) + pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) # @UndefinedVariable Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] mu, var, upper, lower = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) @@ -90,7 +90,7 @@ class GPLVM(GP): Xtest_full[:, :2] = Xtest mu, var, low, up = self.predict(Xtest_full) var = var.mean(axis=1) # this was var[:, :2] edit by Neil - pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') + pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') # @UndefinedVariable for i,ul in enumerate(np.unique(labels)): diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index f442dc67..91cc60e3 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,4 +11,7 @@ from warped_GP import warpedGP from sparse_GPLVM import sparse_GPLVM from uncollapsed_sparse_GP import uncollapsed_sparse_GP from Bayesian_GPLVM import Bayesian_GPLVM +import mrd +MRD = mrd.MRD +del mrd from generalized_FITC import generalized_FITC From 3593e9f5863164651ab5072ad138c7694ca1cf91 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 15:02:26 +0100 Subject: [PATCH 02/11] mrd touches --- GPy/models/mrd.py | 139 +++++++++++++++++++++++++++++++++++++++ GPy/testing/mrd_tests.py | 32 +++++++++ 2 files changed, 171 insertions(+) create mode 100644 GPy/models/mrd.py create mode 100644 GPy/testing/mrd_tests.py diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py new file mode 100644 index 00000000..0ba4a695 --- /dev/null +++ b/GPy/models/mrd.py @@ -0,0 +1,139 @@ +''' +Created on 10 Apr 2013 + +@author: Max Zwiessele +''' +from GPy.core import model +from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM +import numpy +from GPy.models.sparse_GP import sparse_GP +import itertools + + +class MRD(model): + """ + Do MRD on given Datasets in Ylist. + All Ys in Ylist are in [N x Dn], where Dn can be different per Yn, + N must be shared across datasets though. + + :param Ylist...: observed datasets + :type Ylist: [np.ndarray] + :param names: names for different gplvm models + :type names: [str] + :param Q: latent dimensionality + :type Q: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + :param X: + Initial latent space + :param X_variance: + Initial latent space variance + :param init: [PCA|random] + initialization method to use + :param M: + number of inducing inputs to use + :param Z: + initial inducing inputs + :param kernel: + kernel to use + """ + def __init__(self, *Ylist, **kwargs): + self._debug = False + if kwargs.has_key("_debug"): + self._debug = kwargs['_debug'] + del kwargs['_debug'] + if kwargs.has_key("names"): + self.names = kwargs['names'] + del kwargs['names'] + else: + self.names = ["{}".format(i + 1) for i in range(len(Ylist))] + if kwargs.has_key('kernel'): + kernel = kwargs['kernel'] + k = lambda: kernel.copy() + del kwargs['kernel'] + else: + k = lambda: None + self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), **kwargs) for Y in Ylist] + self.gref = self.bgplvms[0] + nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) + self.nparams = nparams.cumsum() + self.Q = self.gref.Q + self.N = self.gref.N + self.NQ = self.N * self.Q + self.M = self.gref.M + self.MQ = self.M * self.Q + + model.__init__(self) # @UndefinedVariable + + def _get_param_names(self): + # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + n1 = self.gref._get_param_names() + n1var = n1[:self.NQ * 2 + self.MQ] + map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), + itertools.izip(ns, + itertools.repeat(name))) + return list(itertools.chain(n1var, *(map_names(\ + sparse_GP._get_param_names(g)[self.MQ:], n) \ + for g, n in zip(self.bgplvms, self.names)))) + + def _get_params(self): + """ + return parameter list containing private and shared parameters as follows: + + ================================================================= + | mu | S | Z || theta1 | theta2 | .. | thetaN | + ================================================================= + """ + X = self.gref.X.flatten() + X_var = self.gref.X_variance.flatten() + Z = self.gref.Z.flatten() + thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] + params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) + return params + + def _set_var_params(self, g, X, X_var, Z): + g.X = X + g.X_variance = X_var + g.Z = Z + + def _set_kern_params(self, g, p): + g.kern._set_params(p[:g.kern.Nparam]) + g.likelihood._set_params(p[g.kern.Nparam:]) + + def _set_params(self, x): + start = 0; end = self.NQ + X = x[start:end].reshape(self.N, self.Q) + start = end; end += start + X_var = x[start:end].reshape(self.N, self.Q) + start = end; end += self.MQ + Z = x[start:end].reshape(self.M, self.Q) + thetas = x[end:] + + # set params for all others: + for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): + self._set_var_params(g, X, X_var, Z) + self._set_kern_params(g, thetas[s:e]) + g._compute_kernel_matrices() + g._computations() + + + def log_likelihood(self): + ll = self.gref.KL_divergence() + for g in self.bgplvms: + ll += sparse_GP.log_likelihood(g) + return ll + + def _log_likelihood_gradients(self): + dldmus = self.gref.dL_dmuS().flatten() + dldzt1 = sparse_GP._log_likelihood_gradients(self.gref) + return numpy.hstack((dldmus, + dldzt1, + numpy.hstack([numpy.hstack([g.dL_dtheta(), + g.likelihood._gradients(\ + partial=g.partial_for_likelihood)]) \ + for g in self.bgplvms[1:]]))) + + def plot_scales(self): + pass + diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py new file mode 100644 index 00000000..e25a1040 --- /dev/null +++ b/GPy/testing/mrd_tests.py @@ -0,0 +1,32 @@ +# Copyright (c) 2013, Max Zwiessele +# Licensed under the BSD 3-clause license (see LICENSE.txt) +''' +Created on 10 Apr 2013 + +@author: maxz +''' + +import unittest +import numpy as np +import GPy + +# class MRDTests(unittest.TestCase): +# +# # @unittest.skip('') +# def test_gradients(self): +# num_m = 2 +# N, M, Q, D = 10, 3, 2, 4 +# X = np.random.rand(N, Q) +# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) +# K = k.K(X) +# Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] +# +# m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) +# m._debug = True +# m.ensure_default_constraints() +# m.randomize() +# self.assertTrue(m.checkgrad()) +# +# if __name__ == "__main__": +# print "Running unit tests, please be (very) patient..." +# # unittest.main() From e99993b8ce0cd0b1ad3084a01dc593926a38222b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 15:11:41 +0100 Subject: [PATCH 03/11] GPLVM readded --- GPy/models/GPLVM.py | 125 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 GPy/models/GPLVM.py diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py new file mode 100644 index 00000000..32594594 --- /dev/null +++ b/GPy/models/GPLVM.py @@ -0,0 +1,125 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +import pylab as pb +import sys, pdb +from .. import kern +from ..core import model +from ..util.linalg import pdinv, PCA +from GP import GP +from ..likelihoods import Gaussian +from .. import util + +class GPLVM(GP): + """ + Gaussian Process Latent Variable Model + + :param Y: observed data + :type Y: np.ndarray + :param Q: latent dimensionality + :type Q: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + + """ + def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs): + if X is None: + X = self.initialise_latent(init, Q, Y) + if kernel is None: + kernel = kern.rbf(Q) + kern.bias(Q) + likelihood = Gaussian(Y) + GP.__init__(self, X, likelihood, kernel, **kwargs) + + def initialise_latent(self, init, Q, Y): + if init == 'PCA': + return PCA(Y, Q)[0] + else: + return np.random.randn(Y.shape[0], Q) + + def _get_param_names(self): + return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self) + + def _get_params(self): + return np.hstack((self.X.flatten(), GP._get_params(self))) + + def _set_params(self,x): + self.X = x[:self.X.size].reshape(self.N,self.Q).copy() + GP._set_params(self, x[self.X.size:]) + + def _log_likelihood_gradients(self): + dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X) + + return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self))) + + def plot(self): + assert self.likelihood.Y.shape[1]==2 + pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) + Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] + mu, var, upper, lower = self.predict(Xnew) + pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) + + def plot_latent(self,labels=None, which_indices=None, resolution=50): + """ + :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) + :param resolution: the resolution of the grid on which to evaluate the predictive variance + """ + + util.plot.Tango.reset() + + if labels is None: + labels = np.ones(self.N) + if which_indices is None: + if self.Q==1: + input_1 = 0 + input_2 = None + if self.Q==2: + input_1, input_2 = 0,1 + else: + #try to find a linear of RBF kern in the kernel + k = [p for p in self.kern.parts if p.name in ['rbf','linear']] + if (not len(k)==1) or (not k[0].ARD): + raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" + k = k[0] + if k.name=='rbf': + input_1, input_2 = np.argsort(k.lengthscale)[:2] + elif k.name=='linear': + input_1, input_2 = np.argsort(k.variances)[::-1][:2] + + #first, plot the output variance as a function of the latent space + Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) + Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) + Xtest_full[:, :2] = Xtest + mu, var, low, up = self.predict(Xtest_full) + var = var[:, :2] + pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear') + + + for i,ul in enumerate(np.unique(labels)): + if type(ul) is np.string_: + this_label = ul + elif type(ul) is np.int64: + this_label = 'class %i'%ul + else: + this_label = 'class %i'%i + + index = np.nonzero(labels==ul)[0] + if self.Q==1: + x = self.X[index,input_1] + y = np.zeros(index.size) + else: + x = self.X[index,input_1] + y = self.X[index,input_2] + pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) + + pb.xlabel('latent dimension %i'%input_1) + pb.ylabel('latent dimension %i'%input_2) + + if not np.all(labels==1.): + pb.legend(loc=0,numpoints=1) + + pb.xlim(xmin[0],xmax[0]) + pb.ylim(xmin[1],xmax[1]) + + return input_1, input_2 From dcbd76a6ee4a2d685000625806b81da2b441c591 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 15:47:18 +0100 Subject: [PATCH 04/11] kern plotting with axisa --- GPy/examples/dimensionality_reduction.py | 26 +++++++++++++++--------- GPy/kern/kern.py | 10 ++++----- GPy/models/mrd.py | 16 +++++++++------ 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 248183ff..bfdc9bb6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -3,7 +3,7 @@ import numpy as np import pylab as pb -from matplotlib import pyplot as plt +from matplotlib import pyplot as plt, pyplot import GPy from GPy.models.mrd import MRD @@ -101,22 +101,25 @@ def oil_100(): return m def mrd_simulation(): - num = 2 + # num = 2 ard1 = np.array([1., 1, 0, 0], dtype=float) ard2 = np.array([0., 1, 1, 0], dtype=float) - ard1[ard1 == 0] = 1E+10 - ard2[ard2 == 0] = 1E+10 + ard1[ard1 == 0] = 1E-10 + ard2[ard2 == 0] = 1E-10 - make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) + ard1i = 1. / ard1 + ard2i = 1. / ard2 + + # make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) D1, D2, N, M, Q = 50, 100, 150, 15, 4 X = np.random.randn(N, Q) - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1) + GPy.kern.bias(Q, 1) + GPy.kern.white(Q, 0.0001) + k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T Y1 -= Y1.mean(0) - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2) + GPy.kern.bias(Q, 1) + GPy.kern.white(Q, 0.0001) + k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T Y2 -= Y2.mean(0) @@ -125,9 +128,12 @@ def mrd_simulation(): m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False) m.ensure_default_constraints() - m.optimize(messages=1, max_f_eval=5000) + fig = pyplot.figure("expected", figsize=(8, 3)) + ax = fig.add_subplot(121) + ax.bar(np.arange(ard1.size) + .1, ard1) + ax = fig.add_subplot(122) + ax.bar(np.arange(ard2.size) + .1, ard2) - import ipdb;ipdb.set_trace() return m def brendan_faces(): @@ -175,7 +181,7 @@ def BGPLVM_oil(): Q = 10 M = 30 - kernel = GPy.kern.rbf(Q, ARD = True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) # m.scale_factor = 100.0 m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 7d3b1737..6f9c97f6 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -52,7 +52,7 @@ class kern(parameterised): parameterised.__init__(self) - def plot_ARD(self): + def plot_ARD(self, ax=pb.gca()): """ If an ARD kernel is present, it bar-plots the ARD parameters @@ -60,16 +60,16 @@ class kern(parameterised): """ for p in self.parts: if hasattr(p, 'ARD') and p.ARD: - pb.figure() - pb.title('ARD parameters, %s kernel' % p.name) + ax.set_title('ARD parameters, %s kernel' % p.name) if p.name == 'linear': ard_params = p.variances else: ard_params = 1./p.lengthscale - pb.bar(np.arange(len(ard_params))-0.4, ard_params) - + ax.bar(np.arange(len(ard_params)) - 0.4, ard_params) + ax.set_xticks(np.arange(len(ard_params)), + ["${}$".format(i + 1) for i in range(len(ard_params))]) def _transform_gradients(self,g): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 0ba4a695..842ef42f 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -8,6 +8,8 @@ from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM import numpy from GPy.models.sparse_GP import sparse_GP import itertools +from matplotlib import pyplot +import pylab class MRD(model): @@ -103,17 +105,17 @@ class MRD(model): def _set_params(self, x): start = 0; end = self.NQ - X = x[start:end].reshape(self.N, self.Q) + X = x[start:end].reshape(self.N, self.Q).copy() start = end; end += start - X_var = x[start:end].reshape(self.N, self.Q) + X_var = x[start:end].reshape(self.N, self.Q).copy() start = end; end += self.MQ - Z = x[start:end].reshape(self.M, self.Q) + Z = x[start:end].reshape(self.M, self.Q).copy() thetas = x[end:] # set params for all others: for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): self._set_var_params(g, X, X_var, Z) - self._set_kern_params(g, thetas[s:e]) + self._set_kern_params(g, thetas[s:e].copy()) g._compute_kernel_matrices() g._computations() @@ -135,5 +137,7 @@ class MRD(model): for g in self.bgplvms[1:]]))) def plot_scales(self): - pass - + fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3)) + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + g.kern.plot_ARD(ax=ax) From de6b00ebfdee91c2ce106ccd414455e19ab79399 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 16:00:14 +0100 Subject: [PATCH 05/11] plot_latent added for mrd --- GPy/models/GPLVM.py | 85 ++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 44 deletions(-) diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index ca380a77..2ce55dda 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -24,7 +24,7 @@ class GPLVM(GP): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, init='PCA', X=None, kernel=None, **kwargs): + def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs): if X is None: X = self.initialise_latent(init, Q, Y) if kernel is None: @@ -39,28 +39,28 @@ class GPLVM(GP): return np.random.randn(Y.shape[0], Q) def _get_param_names(self): - return sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + GP._get_param_names(self) + return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self) def _get_params(self): return np.hstack((self.X.flatten(), GP._get_params(self))) - def _set_params(self, x): - self.X = x[:self.X.size].reshape(self.N, self.Q).copy() + def _set_params(self,x): + self.X = x[:self.X.size].reshape(self.N,self.Q).copy() GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): - dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X) + dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X) - return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self))) + return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self))) def plot(self): - assert self.likelihood.Y.shape[1] == 2 - pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) - Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None] + assert self.likelihood.Y.shape[1]==2 + pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet) + Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None] mu, var, upper, lower = self.predict(Xnew) - pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5) + pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) - def plot_latent(self, labels=None, which_indices=None, resolution=50): + def plot_latent(self,labels=None, which_indices=None, resolution=50): """ :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -71,57 +71,54 @@ class GPLVM(GP): if labels is None: labels = np.ones(self.N) if which_indices is None: - if self.Q == 1: + if self.Q==1: input_1 = 0 input_2 = None - if self.Q == 2: - input_1, input_2 = 0, 1 + if self.Q==2: + input_1, input_2 = 0,1 else: - # try to find a linear of RBF kern in the kernel - k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']] - if (not len(k) == 1) or (not k[0].ARD): + try: + input_1, input_2 = np.argsort(self.input_sensitivity())[:2] + except: raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" - k = k[0] - if k.name == 'rbf': - input_1, input_2 = np.argsort(k.lengthscale)[:2] - elif k.name == 'linear': - input_1, input_2 = np.argsort(k.variances)[::-1][:2] + else: + input_1, input_2 = which_indices - # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(self.X[:, [input_1, input_2]], resolution=resolution) + #first, plot the output variance as a function of the latent space + Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) Xtest_full[:, :2] = Xtest mu, var, low, up = self.predict(Xtest_full) - var = var[:, :1] - pb.imshow(var.reshape(resolution, resolution).T[::-1, :], - extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear') + var = var[:, :1] + pb.imshow(var.reshape(resolution,resolution).T[::-1,:], + extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear') - for i, ul in enumerate(np.unique(labels)): + for i,ul in enumerate(np.unique(labels)): if type(ul) is np.string_: this_label = ul elif type(ul) is np.int64: - this_label = 'class %i' % ul + this_label = 'class %i'%ul else: - this_label = 'class %i' % i + this_label = 'class %i'%i - index = np.nonzero(labels == ul)[0] - if self.Q == 1: - x = self.X[index, input_1] + index = np.nonzero(labels==ul)[0] + if self.Q==1: + x = self.X[index,input_1] y = np.zeros(index.size) else: - x = self.X[index, input_1] - y = self.X[index, input_2] - pb.plot(x, y, marker='o', color=util.plot.Tango.nextMedium(), mew=0, label=this_label, linewidth=0) + x = self.X[index,input_1] + y = self.X[index,input_2] + pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) - pb.xlabel('latent dimension %i' % input_1) - pb.ylabel('latent dimension %i' % input_2) + pb.xlabel('latent dimension %i'%input_1) + pb.ylabel('latent dimension %i'%input_2) - if not np.all(labels == 1.): - pb.legend(loc=0, numpoints=1) + if not np.all(labels==1.): + pb.legend(loc=0,numpoints=1) - pb.xlim(xmin[0], xmax[0]) - pb.ylim(xmin[1], xmax[1]) - pb.grid(b=False) # remove the grid if present, it doesn't look good + pb.xlim(xmin[0],xmax[0]) + pb.ylim(xmin[1],xmax[1]) + pb.grid(b=False) # remove the grid if present, it doesn't look good ax = pb.gca() - ax.set_aspect('auto') # set a nice aspect ratio + ax.set_aspect('auto') # set a nice aspect ratio return ax From 51ff92e59151a08a05097565457e113f9090b311 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 11 Apr 2013 18:44:18 +0100 Subject: [PATCH 06/11] finished mrd and added plotting functions --- GPy/examples/dimensionality_reduction.py | 43 ++++++++++-------- GPy/inference/SCG.py | 3 +- GPy/kern/kern.py | 2 +- GPy/models/Bayesian_GPLVM.py | 44 +++++++++++-------- GPy/models/GPLVM.py | 18 ++++---- GPy/models/mrd.py | 55 ++++++++++++++++++++---- GPy/testing/mrd_tests.py | 40 ++++++++--------- 7 files changed, 130 insertions(+), 75 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index c4e064a5..93ce374d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -107,32 +107,41 @@ def mrd_simulation(): ard1[ard1 == 0] = 1E-10 ard2[ard2 == 0] = 1E-10 - ard1i = 1. / ard1 - ard2i = 1. / ard2 +# ard1i = 1. / ard1 +# ard2i = 1. / ard2 - # make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) +# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) +# Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T +# Y1 -= Y1.mean(0) +# +# k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) +# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T +# Y2 -= Y2.mean(0) +# make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) D1, D2, N, M, Q = 50, 100, 150, 15, 4 - X = np.random.randn(N, Q) + x = np.linspace(0, 2 * np.pi, N)[:, None] - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard1i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) - Y1 = np.random.multivariate_normal(np.zeros(N), k.K(X), D1).T - Y1 -= Y1.mean(0) + s1 = np.vectorize(lambda x: np.sin(x)) + s2 = np.vectorize(lambda x: np.cos(x)) + sS = np.vectorize(lambda x: np.sin(2 * x)) - k = GPy.kern.rbf(Q, ARD=True, lengthscale=ard2i) + GPy.kern.bias(Q, 0) + GPy.kern.white(Q, 0.0001) - Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T - Y2 -= Y2.mean(0) + S1 = np.hstack([s1(x), sS(x)]) + S2 = np.hstack([s2(x), sS(x)]) - k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q, 1.0) + Y1 = S1.dot(np.random.randn(S1.shape[1], D1)) + Y2 = S2.dot(np.random.randn(S2.shape[1], D2)) - m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, _debug=False) + k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + + m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False) m.ensure_default_constraints() - fig = pyplot.figure("expected", figsize=(8, 3)) - ax = fig.add_subplot(121) - ax.bar(np.arange(ard1.size) + .1, ard1) - ax = fig.add_subplot(122) - ax.bar(np.arange(ard2.size) + .1, ard2) +# fig = pyplot.figure("expected", figsize=(8, 3)) +# ax = fig.add_subplot(121) +# ax.bar(np.arange(ard1.size) + .1, ard1) +# ax = fig.add_subplot(122) +# ax.bar(np.arange(ard2.size) + .1, ard2) return m diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index 0e85f243..912641f6 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto iteration += 1 if display: - print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', + print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta), + # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', sys.stdout.flush() if success: diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 6f9c97f6..447819c1 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -70,7 +70,7 @@ class kern(parameterised): ax.bar(np.arange(len(ard_params)) - 0.4, ard_params) ax.set_xticks(np.arange(len(ard_params)), ["${}$".format(i + 1) for i in range(len(ard_params))]) - + return ax def _transform_gradients(self,g): x = self._get_params() diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index ee485e76..dd55b38f 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -22,7 +22,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, Q, X = None, X_variance = None, init='PCA', M=10, Z=None, kernel=None, **kwargs): + def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs): if X == None: X = self.initialise_latent(init, Q, Y) @@ -31,7 +31,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): if Z is None: Z = np.random.permutation(X.copy())[:M] - assert Z.shape[1]==X.shape[1] + assert Z.shape[1] == X.shape[1] if kernel is None: kernel = kern.rbf(Q) + kern.white(Q) @@ -40,8 +40,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs) def _get_param_names(self): - X_names = sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) - S_names = sum([['X_variance_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) return (X_names + S_names + sparse_GP._get_param_names(self)) def _get_params(self): @@ -56,35 +56,43 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): """ return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) - def _set_params(self,x): + def _set_params(self, x): N, Q = self.N, self.Q - self.X = x[:self.X.size].reshape(N,Q).copy() - self.X_variance = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() - sparse_GP._set_params(self, x[(2*N*Q):]) + self.X = x[:self.X.size].reshape(N, Q).copy() + self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy() + sparse_GP._set_params(self, x[(2 * N * Q):]) + + + def dKL_dmuS(self): + dKL_dS = (1. - (1. / self.X_variance)) * 0.5 + dKL_dmu = self.X + return dKL_dmu, dKL_dS def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0,self.Z,self.X,self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2,self.Z,self.X,self.X_variance) + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - dKL_dS = (1. - (1./self.X_variance))*0.5 - dKL_dmu = self.X - return np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) + return dL_dmu, dL_dS def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) - return 0.5*(var_mean + var_S) - 0.5*self.Q*self.N + return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N def log_likelihood(self): return sparse_GP.log_likelihood(self) - self.KL_divergence() def _log_likelihood_gradients(self): - return np.hstack((self.dL_dmuS().flatten(), sparse_GP._log_likelihood_gradients(self))) + dKL_dmu, dKL_dS = self.dKL_dmuS() + dL_dmu, dL_dS = self.dL_dmuS() + # TODO: find way to make faster + dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten())) + return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self))) - def plot_latent(self, which_indices=None,*args, **kwargs): + def plot_latent(self, which_indices=None, *args, **kwargs): if which_indices is None: try: @@ -93,6 +101,6 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" else: input_1, input_2 = which_indices - ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) + ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs) ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') return ax diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index 2ce55dda..0c81a9c1 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -60,7 +60,7 @@ class GPLVM(GP): mu, var, upper, lower = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) - def plot_latent(self,labels=None, which_indices=None, resolution=50): + def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()): """ :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance @@ -90,7 +90,7 @@ class GPLVM(GP): Xtest_full[:, :2] = Xtest mu, var, low, up = self.predict(Xtest_full) var = var[:, :1] - pb.imshow(var.reshape(resolution,resolution).T[::-1,:], + ax.imshow(var.reshape(resolution, resolution).T[::-1, :], extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear') for i,ul in enumerate(np.unique(labels)): @@ -110,15 +110,15 @@ class GPLVM(GP): y = self.X[index,input_2] pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0) - pb.xlabel('latent dimension %i'%input_1) - pb.ylabel('latent dimension %i'%input_2) + ax.set_xlabel('latent dimension %i' % input_1) + ax.set_ylabel('latent dimension %i' % input_2) if not np.all(labels==1.): - pb.legend(loc=0,numpoints=1) + ax.legend(loc=0, numpoints=1) - pb.xlim(xmin[0],xmax[0]) - pb.ylim(xmin[1],xmax[1]) - pb.grid(b=False) # remove the grid if present, it doesn't look good - ax = pb.gca() + ax.set_xlim(xmin[0], xmax[0]) + ax.set_ylim(xmin[1], xmax[1]) + ax.grid(b=False) # remove the grid if present, it doesn't look good + # ax = pb.gca() ax.set_aspect('auto') # set a nice aspect ratio return ax diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 842ef42f..bd1c3528 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -121,23 +121,60 @@ class MRD(model): def log_likelihood(self): - ll = self.gref.KL_divergence() + ll = +self.gref.KL_divergence() for g in self.bgplvms: - ll += sparse_GP.log_likelihood(g) - return ll + ll -= sparse_GP.log_likelihood(g) + return -ll def _log_likelihood_gradients(self): - dldmus = self.gref.dL_dmuS().flatten() - dldzt1 = sparse_GP._log_likelihood_gradients(self.gref) - return numpy.hstack((dldmus, + dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms)) + dKLmu, dKLdS = self.gref.dKL_dmuS() + dLdmu -= dKLmu + dLdS -= dKLdS + dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() + dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) + + return numpy.hstack((dLdmuS, dldzt1, numpy.hstack([numpy.hstack([g.dL_dtheta(), g.likelihood._gradients(\ partial=g.partial_for_likelihood)]) \ - for g in self.bgplvms[1:]]))) + for g in self.bgplvms]))) - def plot_scales(self): + def plot_X(self): + fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + ax.imshow(g.X) + pylab.draw() + fig.tight_layout() + return fig + + def plot_predict(self): + fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + ax.imshow(g.predict(g.X)[0]) + pylab.draw() + fig.tight_layout() + return fig + + def plot_scales(self, *args, **kwargs): fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3)) for i, g in enumerate(self.bgplvms): ax = fig.add_subplot(1, len(self.bgplvms), i + 1) - g.kern.plot_ARD(ax=ax) + g.kern.plot_ARD(ax=ax, *args, **kwargs) + pylab.draw() + fig.tight_layout() + return fig + + def plot_latent(self, *args, **kwargs): + fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3)) + for i, g in enumerate(self.bgplvms): + ax = fig.add_subplot(1, len(self.bgplvms), i + 1) + g.plot_latent(ax=ax, *args, **kwargs) + pylab.draw() + fig.tight_layout() + return fig diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py index e25a1040..d90a7430 100644 --- a/GPy/testing/mrd_tests.py +++ b/GPy/testing/mrd_tests.py @@ -10,23 +10,23 @@ import unittest import numpy as np import GPy -# class MRDTests(unittest.TestCase): -# -# # @unittest.skip('') -# def test_gradients(self): -# num_m = 2 -# N, M, Q, D = 10, 3, 2, 4 -# X = np.random.rand(N, Q) -# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) -# K = k.K(X) -# Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] -# -# m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) -# m._debug = True -# m.ensure_default_constraints() -# m.randomize() -# self.assertTrue(m.checkgrad()) -# -# if __name__ == "__main__": -# print "Running unit tests, please be (very) patient..." -# # unittest.main() +class MRDTests(unittest.TestCase): + + def test_gradients(self): + num_m = 3 + N, M, Q, D = 20, 8, 5, 50 + X = np.random.rand(N, Q) + + k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q) + K = k.K(X) + Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] + + m = GPy.models.MRD(*Ylist, Q=Q, kernel=k, M=M) + m.ensure_default_constraints() + m.randomize() + + self.assertTrue(m.checkgrad()) + +if __name__ == "__main__": + print "Running unit tests, please be (very) patient..." + unittest.main() From 338f3295b15025e4abd9332042e53f80e9600c06 Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Fri, 12 Apr 2013 13:31:15 +0100 Subject: [PATCH 07/11] now returning the ax for plot_latent in BGPLVM --- GPy/models/Bayesian_GPLVM.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index ba9603bb..aaaefa7f 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -95,3 +95,5 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): input_1, input_2 = which_indices ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs) ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') + + return ax From c7e8345c9614300fbfe20102026864df0756af5b Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Fri, 12 Apr 2013 13:31:45 +0100 Subject: [PATCH 08/11] --march=native was causing problems on the stupid compiler on MacOS --- GPy/kern/rbf.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index a26bb79c..ff5d6ff3 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -228,9 +228,8 @@ class rbf(kernpart): def weave_psi2(self,mu,Zhat): weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -march=native'], - 'extra_link_args' : ['-lgomp'], - 'compiler' : 'gcc'} + 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], + 'extra_link_args' : ['-lgomp']} N,Q = mu.shape M = Zhat.shape[0] From ffa1879cfc7cb93b7921e33ce11029bd5efa7f8c Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Fri, 12 Apr 2013 13:32:18 +0100 Subject: [PATCH 09/11] added automatic scale_factor to sparse GPs --- GPy/models/sparse_GP.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index 88abf77d..cebcba0b 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -36,7 +36,7 @@ class sparse_GP(GP): def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False): self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable - + self.auto_scale_factor = False self.Z = Z self.Zslices = Zslices self.Xslices = Xslices @@ -184,6 +184,8 @@ class sparse_GP(GP): self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self._compute_kernel_matrices() + if self.auto_scale_factor: + self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) self._computations() def _get_params(self): From c8d64a4a69bd193833ac61c3fad721b6cdb3eecc Mon Sep 17 00:00:00 2001 From: Nicolo Fusi Date: Fri, 12 Apr 2013 13:32:27 +0100 Subject: [PATCH 10/11] minor changes --- GPy/examples/dimensionality_reduction.py | 2 +- GPy/inference/SGD.py | 53 ++++++++---------------- 2 files changed, 19 insertions(+), 36 deletions(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 61a4abd8..b8c60a09 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -120,7 +120,7 @@ def brendan_faces(): def stick(): data = GPy.util.datasets.stick() m = GPy.models.GPLVM(data['Y'], 2) - + # optimize m.ensure_default_constraints() m.optimize(messages=1, max_f_eval=10000) diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py index 13a325b0..5d1b673d 100644 --- a/GPy/inference/SGD.py +++ b/GPy/inference/SGD.py @@ -75,7 +75,10 @@ class opt_SGD(Optimizer): return (np.isnan(data).sum(axis=1) == 0) def check_for_missing(self, data): - return np.isnan(data).sum() > 0 + if sp.sparse.issparse(self.model.likelihood.Y): + return True + else: + return np.isnan(data).sum() > 0 def subset_parameter_vector(self, x, samples, param_shapes): subset = np.array([], dtype = int) @@ -149,10 +152,10 @@ class opt_SGD(Optimizer): else: raise NotImplementedError - def step_with_missing_data(self, f_fp, X, step, shapes, sparse_matrix): + def step_with_missing_data(self, f_fp, X, step, shapes): N, Q = X.shape - if not sparse_matrix: + if not sp.sparse.issparse(self.model.likelihood.Y): Y = self.model.likelihood.Y samples = self.non_null_samples(self.model.likelihood.Y) self.model.N = samples.sum() @@ -165,7 +168,6 @@ class opt_SGD(Optimizer): if self.model.N == 0 or Y.std() == 0.0: return 0, step, self.model.N - # FIXME: get rid of self.center, everything should be centered by default self.model.likelihood._mean = Y.mean() self.model.likelihood._std = Y.std() self.model.likelihood.set_data(Y) @@ -173,10 +175,6 @@ class opt_SGD(Optimizer): j = self.subset_parameter_vector(self.x_opt, samples, shapes) self.model.X = X[samples] - # if self.center: - # self.model.likelihood.Y -= self.model.likelihood.Y.mean() - # self.model.likelihood.Y /= self.model.likelihood.Y.std() - model_name = self.model.__class__.__name__ if model_name == 'Bayesian_GPLVM': @@ -185,33 +183,31 @@ class opt_SGD(Optimizer): b, p = self.shift_constraints(j) f, fp = f_fp(self.x_opt[j]) - # momentum_term = self.momentum * step[j] - # step[j] = self.learning_rate[j] * fp - # self.x_opt[j] -= step[j] + momentum_term - step[j] = self.momentum * step[j] + self.learning_rate[j] * fp self.x_opt[j] -= step[j] self.restore_constraints(b, p) + # restore likelihood _mean and _std, otherwise when we call set_data(y) on + # the next feature, it will get normalized with the mean and std of this one. + self.model.likelihood._mean = 0 + self.model.likelihood._std = 1 return f, step, self.model.N def opt(self, f_fp=None, f=None, fp=None): self.x_opt = self.model._get_params_transformed() X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() - N, Q = self.model.X.shape - D = self.model.likelihood.Y.shape[1] - self.trace = [] - sparse_matrix = sp.sparse.issparse(self.model.likelihood.Y) - missing_data = True - if not sparse_matrix: - missing_data = self.check_for_missing(self.model.likelihood.Y) self.model.likelihood.YYT = None self.model.likelihood.trYYT = None self.model.likelihood._mean = 0.0 self.model.likelihood._std = 1.0 + + N, Q = self.model.X.shape + D = self.model.likelihood.Y.shape[1] num_params = self.model._get_params() + self.trace = [] + missing_data = self.check_for_missing(self.model.likelihood.Y) step = np.zeros_like(num_params) for it in range(self.iterations): @@ -224,34 +220,26 @@ class opt_SGD(Optimizer): b = len(features)/self.batch_size features = [features[i::b] for i in range(b)] NLL = [] - count = 0 - last_printed_count = -1 - for j in features: - count += 1 + for count, j in enumerate(features): self.model.D = len(j) self.model.likelihood.D = len(j) self.model.likelihood.set_data(Y[:, j]) - if missing_data or sparse_matrix: + if missing_data: shapes = self.get_param_shapes(N, Q) - f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes, sparse_matrix) + f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes) else: Nj = N f, fp = f_fp(self.x_opt) - # momentum_term = self.momentum * step # compute momentum using update(t-1) - # step = self.learning_rate * fp # compute update(t) - # self.x_opt -= step + momentum_term step = self.momentum * step + self.learning_rate * fp self.x_opt -= step - if self.messages == 2: noise = self.model.likelihood._variance status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) sys.stdout.write(status) sys.stdout.flush() - last_printed_count = count self.param_traces['noise'].append(noise) NLL.append(f) @@ -269,7 +257,6 @@ class opt_SGD(Optimizer): self.model.likelihood.D = D self.model.likelihood.Y = Y - # self.model.Youter = np.dot(Y, Y.T) self.trace.append(self.f_opt) if self.iteration_file is not None: f = open(self.iteration_file + "iteration%d.pickle" % it, 'w') @@ -282,7 +269,3 @@ class opt_SGD(Optimizer): status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f}\n".format(it+1, self.iterations, self.f_opt) sys.stdout.write(status) sys.stdout.flush() - - - - From f6b98160a7c0ace6ca5f795aeb878d30b8aaf6a4 Mon Sep 17 00:00:00 2001 From: Ricardo Andrade Date: Fri, 12 Apr 2013 18:45:14 +0100 Subject: [PATCH 11/11] auto_scale --- GPy/models/sparse_GP.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index cebcba0b..b816e684 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -185,7 +185,11 @@ class sparse_GP(GP): self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self._compute_kernel_matrices() if self.auto_scale_factor: - self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) + if self.likelihood.is_heteroscedastic: + self.scale_factor = max(100.,(self.psi2_beta_scaled.sum(0).max())) + print self.scale_factor + else: + self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision) self._computations() def _get_params(self):