diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 0e68e7f7..f4115c1a 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -6,7 +6,6 @@ import pylab as pb from matplotlib import pyplot as plt, pyplot import GPy -from GPy.models.mrd import MRD default_seed = np.random.seed(123344) @@ -100,12 +99,12 @@ def oil_100(): # m.plot_latent(labels=data['Y'].argmax(axis=1)) return m -def mrd_simulation(): +def mrd_simulation(plot_sim=False): # num = 2 - ard1 = np.array([1., 1, 0, 0], dtype=float) - ard2 = np.array([0., 1, 1, 0], dtype=float) - ard1[ard1 == 0] = 1E-10 - ard2[ard2 == 0] = 1E-10 +# ard1 = np.array([1., 1, 0, 0], dtype=float) +# ard2 = np.array([0., 1, 1, 0], dtype=float) +# ard1[ard1 == 0] = 1E-10 +# ard2[ard2 == 0] = 1E-10 # ard1i = 1. / ard1 # ard2i = 1. / ard2 @@ -119,24 +118,79 @@ def mrd_simulation(): # Y2 -= Y2.mean(0) # make_params = lambda ard: np.hstack([[1], ard, [1, .3]]) - D1, D2, N, M, Q = 50, 100, 150, 15, 4 + D1, D2, D3, N, M, Q = 6, 7, 8, 150, 18, 5 x = np.linspace(0, 2 * np.pi, N)[:, None] s1 = np.vectorize(lambda x: np.sin(x)) s2 = np.vectorize(lambda x: np.cos(x)) + s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x))) sS = np.vectorize(lambda x: np.sin(2 * x)) - S1 = np.hstack([s1(x), sS(x)]) - S2 = np.hstack([s2(x), sS(x)]) + s1 = s1(x) + s2 = s2(x) + s3 = s3(x) + sS = sS(x) + + s1 -= s1.mean() + s2 -= s2.mean() + s3 -= s3.mean() + sS -= sS.mean() + s1 /= np.abs(s1).max() + s2 /= np.abs(s2).max() + s3 /= np.abs(s3).max() + sS /= np.abs(sS).max() + + S1 = np.hstack([s1, sS]) + S2 = np.hstack([s2, sS]) + S3 = np.hstack([s3, sS]) Y1 = S1.dot(np.random.randn(S1.shape[1], D1)) Y2 = S2.dot(np.random.randn(S2.shape[1], D2)) + Y3 = S3.dot(np.random.randn(S3.shape[1], D3)) - k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) + Y1 += .1 * np.random.randn(*Y1.shape) + Y2 += .1 * np.random.randn(*Y2.shape) + Y3 += .1 * np.random.randn(*Y3.shape) - m = MRD(Y1, Y2, Q=Q, M=M, kernel=k, init="PCA", _debug=False) + Y1 -= Y1.mean(0) + Y2 -= Y2.mean(0) + Y3 -= Y3.mean(0) + Y1 /= Y1.std(0) + Y2 /= Y2.std(0) + Y3 /= Y3.std(0) + + Slist = [s1, s2, sS] + Ylist = [Y1, Y2] + + if plot_sim: + import pylab + import itertools + fig = pylab.figure("MRD Simulation", figsize=(8, 6)) + fig.clf() + ax = fig.add_subplot(2, 1, 1) + labls = sorted(filter(lambda x: x.startswith("s"), locals())) + for S, lab in itertools.izip(Slist, labls): + ax.plot(x, S, label=lab) + ax.legend() + for i, Y in enumerate(Ylist): + ax = fig.add_subplot(2, len(Ylist), len(Slist) + i) + ax.imshow(Y) + ax.set_title("Y{}".format(i + 1)) + pylab.draw() + pylab.tight_layout() + + from GPy.models import mrd + from GPy import kern + reload(mrd); reload(kern) + k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q) + m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, init="single", _debug=False) m.ensure_default_constraints() + # cstr = "noise|white|variance" + # m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-10, 1.) + + m.auto_scale_factor = True + # fig = pyplot.figure("expected", figsize=(8, 3)) # ax = fig.add_subplot(121) # ax.bar(np.arange(ard1.size) + .1, ard1) @@ -145,6 +199,10 @@ def mrd_simulation(): return m +def mrd_silhouette(): + + pass + def brendan_faces(): data = GPy.util.datasets.brendan_faces() Y = data['Y'][0:-1:10, :] diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index 912641f6..ca42acfe 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -104,7 +104,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto iteration += 1 if display: - print 'Iteration: {0:<5g} Objective:{1:< 12g} Scale:{2:< 12g}\r'.format(iteration, fnow, beta), + print '\r', + print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', sys.stdout.flush() diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 447819c1..414a911f 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -52,12 +52,14 @@ class kern(parameterised): parameterised.__init__(self) - def plot_ARD(self, ax=pb.gca()): + def plot_ARD(self, ax=None): """ If an ARD kernel is present, it bar-plots the ARD parameters """ + if ax is None: + ax = pb.gca() for p in self.parts: if hasattr(p, 'ARD') and p.ARD: ax.set_title('ARD parameters, %s kernel' % p.name) diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index d0491de0..bd56ff12 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -60,12 +60,13 @@ class GPLVM(GP): mu, var, upper, lower = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) - def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=pb.gca()): + def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None): """ :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance """ - + if ax is None: + ax = pb.gca() util.plot.Tango.reset() if labels is None: diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 91cc60e3..d63adaf1 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,7 +11,5 @@ from warped_GP import warpedGP from sparse_GPLVM import sparse_GPLVM from uncollapsed_sparse_GP import uncollapsed_sparse_GP from Bayesian_GPLVM import Bayesian_GPLVM -import mrd -MRD = mrd.MRD -del mrd +from mrd import MRD from generalized_FITC import generalized_FITC diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index bd1c3528..81e46774 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -8,9 +8,8 @@ from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM import numpy from GPy.models.sparse_GP import sparse_GP import itertools -from matplotlib import pyplot import pylab - +from GPy.util.linalg import PCA class MRD(model): """ @@ -22,7 +21,7 @@ class MRD(model): :type Ylist: [np.ndarray] :param names: names for different gplvm models :type names: [str] - :param Q: latent dimensionality + :param Q: latent dimensionality (will raise :type Q: int :param init: initialisation method for the latent space :type init: 'PCA'|'random' @@ -40,10 +39,11 @@ class MRD(model): kernel to use """ def __init__(self, *Ylist, **kwargs): - self._debug = False if kwargs.has_key("_debug"): self._debug = kwargs['_debug'] del kwargs['_debug'] + else: + self._debug = False if kwargs.has_key("names"): self.names = kwargs['names'] del kwargs['names'] @@ -55,18 +55,71 @@ class MRD(model): del kwargs['kernel'] else: k = lambda: None - self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), **kwargs) for Y in Ylist] + if kwargs.has_key('init'): + init = kwargs['init'] + del kwargs['init'] + else: + init = "PCA" + try: + self.Q = kwargs["Q"] + except KeyError: + raise ValueError("Need Q for MRD") + try: + self.M = kwargs["M"] + except KeyError: + self.M = 10 + + self._init = True + X = self._init_X(init, Ylist) + Z = numpy.random.permutation(X.copy())[:self.M] + self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), X=X, Z=Z, **kwargs) for Y in Ylist] + del self._init + self.gref = self.bgplvms[0] nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) self.nparams = nparams.cumsum() - self.Q = self.gref.Q + self.N = self.gref.N self.NQ = self.N * self.Q - self.M = self.gref.M self.MQ = self.M * self.Q model.__init__(self) # @UndefinedVariable + @property + def X(self): + return self.gref.X + @X.setter + def X(self, X): + try: + self.propagate_param(X=X) + except AttributeError: + if not self._init: + raise AttributeError("bgplvm list not initialized") + @property + def Ylist(self): + return [g.likelihood.Y for g in self.bgplvms] + @Ylist.setter + def Ylist(self, Ylist): + for g, Y in itertools.izip(self.bgplvms, Ylist): + g.likelihood.Y = Y + + @property + def auto_scale_factor(self): + """ + set auto_scale_factor for all gplvms + :param b: auto_scale_factor + :type b: + """ + return self.gref.auto_scale_factor + @auto_scale_factor.setter + def auto_scale_factor(self, b): + self.propagate_param(auto_scale_factor=b) + + def propagate_param(self, **kwargs): + for key, val in kwargs.iteritems(): + for g in self.bgplvms: + g.__setattr__(key, val) + def _get_param_names(self): # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) @@ -87,9 +140,16 @@ class MRD(model): | mu | S | Z || theta1 | theta2 | .. | thetaN | ================================================================= """ - X = self.gref.X.flatten() - X_var = self.gref.X_variance.flatten() - Z = self.gref.Z.flatten() + X = self.gref.X.ravel() + X_var = self.gref.X_variance.ravel() + Z = self.gref.Z.ravel() + + if self._debug: + for g in self.bgplvms: + assert numpy.allclose(g.X.ravel(), X) + assert numpy.allclose(g.X_variance.ravel(), X_var) + assert numpy.allclose(g.Z.ravel(), Z) + thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) return params @@ -105,26 +165,36 @@ class MRD(model): def _set_params(self, x): start = 0; end = self.NQ - X = x[start:end].reshape(self.N, self.Q).copy() + X = x[start:end] start = end; end += start - X_var = x[start:end].reshape(self.N, self.Q).copy() + X_var = x[start:end] start = end; end += self.MQ - Z = x[start:end].reshape(self.M, self.Q).copy() + Z = x[start:end] thetas = x[end:] - # set params for all others: + if self._debug: + for g in self.bgplvms: + assert numpy.allclose(g.X, self.gref.X) + assert numpy.allclose(g.X_variance, self.gref.X_variance) + assert numpy.allclose(g.Z, self.gref.Z) + + # set params for all: for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): - self._set_var_params(g, X, X_var, Z) - self._set_kern_params(g, thetas[s:e].copy()) - g._compute_kernel_matrices() - g._computations() + g._set_params(numpy.hstack([X, X_var, Z, thetas[s:e]])) +# self._set_var_params(g, X, X_var, Z) +# self._set_kern_params(g, thetas[s:e].copy()) +# g._compute_kernel_matrices() +# if self.auto_scale_factor: +# g.scale_factor = numpy.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision) +# # self.scale_factor = numpy.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision) +# g._computations() def log_likelihood(self): - ll = +self.gref.KL_divergence() + ll = -self.gref.KL_divergence() for g in self.bgplvms: - ll -= sparse_GP.log_likelihood(g) - return -ll + ll += sparse_GP.log_likelihood(g) + return ll def _log_likelihood_gradients(self): dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms)) @@ -141,6 +211,43 @@ class MRD(model): partial=g.partial_for_likelihood)]) \ for g in self.bgplvms]))) + def _init_X(self, init='PCA', Ylist=None): + if Ylist is None: + Ylist = self.Ylist + if init in "PCA_single": + X = numpy.zeros((Ylist[0].shape[0], self.Q)) + for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(Ylist)), Ylist): + X[:, qs] = PCA(Y, len(qs))[0] + elif init in "PCA_concat": + X = PCA(numpy.hstack(Ylist), self.Q)[0] + else: # init == 'random': + X = numpy.random.randn(Ylist[0].shape[0], self.Q) + self.X = X + return X + + def plot_X_1d(self, colors=None): + if colors is None: + colors = pylab.gca()._get_lines.color_cycle + fig = pylab.figure(num="MRD X 1d", figsize=(4 * len(self.bgplvms), (2 * self.X.shape[1]))) + fig.clf() + ax1 = fig.add_subplot(self.X.shape[1], 1, 1) + ax1.plot(self.X, c='k', alpha=.3) + plots = ax1.plot(self.X.T[0], c=colors.next()) + for i in range(self.X.shape[1] - 1): + ax = fig.add_subplot(self.X.shape[1], 1, i + 2) + ax.plot(self.X, c='k', alpha=.3) + plots.extend(ax.plot(self.X.T[i + 1], c=colors.next())) + if i < self.X.shape[1] - 2: + ax.set_xticklabels('') + ax1.set_xticklabels('') + ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])], + bbox_to_anchor=(0., 1 + .01 * self.X.shape[1], + 1., 1. + .01 * self.X.shape[1]), loc=3, + ncol=self.X.shape[1], mode="expand", borderaxespad=0.) + pylab.draw() + fig.tight_layout(h_pad=.01, rect=(0, 0, 1, .95)) + return fig + def plot_X(self): fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3)) fig.clf() @@ -163,6 +270,7 @@ class MRD(model): def plot_scales(self, *args, **kwargs): fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() for i, g in enumerate(self.bgplvms): ax = fig.add_subplot(1, len(self.bgplvms), i + 1) g.kern.plot_ARD(ax=ax, *args, **kwargs) @@ -172,9 +280,27 @@ class MRD(model): def plot_latent(self, *args, **kwargs): fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3)) + fig.clf() for i, g in enumerate(self.bgplvms): ax = fig.add_subplot(1, len(self.bgplvms), i + 1) g.plot_latent(ax=ax, *args, **kwargs) pylab.draw() fig.tight_layout() return fig + + def _debug_plot(self): + self.plot_X() + self.plot_X_1d() + self.plot_latent() + self.plot_scales() + + def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10): + iters = 0 + optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps) + self._debug_plot() + raw_input("enter to start debug") + while iters < maxiters: + optstep() + self._debug_plot() + iters += itersteps +