diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index b85540ce..fb7d93e7 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -5,7 +5,6 @@ import numpy as np import pylab as pb from .. import kern -from ..util.linalg import PCA from ..core import GP, Param from ..likelihoods import Gaussian from .. import util @@ -29,9 +28,11 @@ class GPLVM(GP): """ if X is None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) if kernel is None: - kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index ac2ef9cd..177ddc19 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -6,12 +6,12 @@ import itertools import pylab from ..core import Model -from ..util.linalg import PCA from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..likelihoods import Gaussian +from GPy.util.initialization import initialize_latent class MRD(Model): """ @@ -71,7 +71,7 @@ class MRD(Model): self.num_inducing = self.Z.shape[0] # ensure M==N if M>N if X_variance is None: - X_variance = np.random.uniform(0, .2, X.shape) + X_variance = np.random.uniform(0, .1, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) @@ -147,11 +147,11 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = PCA(np.hstack(Ylist), self.input_dim)[0] + X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim) elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = PCA(Y, len(qs))[0] + X[:, qs] = initialize_latent('PCA', Y, len(qs)) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) return X diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 24194b41..86efa3f0 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -5,13 +5,14 @@ Created on 24 Feb 2014 ''' import numpy as np -from linalg import PCA +from GPy.util.pca import pca def initialize_latent(init, input_dim, Y): Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': - PC = PCA(Y, input_dim)[0] + p = pca(Y) + PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: pass - return Xr \ No newline at end of file + return Xr, p.fracs[:input_dim] \ No newline at end of file diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4745c4aa..b204f813 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'): tmp, _ = dtrtrs(L, X, lower=1, trans=0) return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T -def PCA(Y, input_dim): - """ - Principal component analysis: maximum likelihood solution by SVD - - :param Y: NxD np.array of data - :param input_dim: int, dimension of projection - - - :rval X: - Nxinput_dim np.array of dimensionality reduced data - :rval W: - input_dimxD mapping from X to Y - - """ - if not np.allclose(Y.mean(axis=0), 0.0): - print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - - # Y -= Y.mean(axis=0) - - Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) - [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] - v = X.std(axis=0) - X /= v; - W *= v; - return X, W.T diff --git a/GPy/util/pca.py b/GPy/util/pca.py new file mode 100644 index 00000000..6c548b3d --- /dev/null +++ b/GPy/util/pca.py @@ -0,0 +1,122 @@ +''' +Created on 10 Sep 2012 + +@author: Max Zwiessele +@copyright: Max Zwiessele 2012 +''' +import numpy +import pylab +import matplotlib +from numpy.linalg.linalg import LinAlgError + +class pca(object): + """ + pca module with automatic primal/dual determination. + """ + def __init__(self, X): + self.mu = X.mean(0) + self.sigma = X.std(0) + + X = self.center(X) + + # self.X = input + if X.shape[0] >= X.shape[1]: + # print "N >= D: using primal" + self.eigvals, self.eigvectors = self._primal_eig(X) + else: + # print "N < D: using dual" + self.eigvals, self.eigvectors = self._dual_eig(X) + self.sort = numpy.argsort(self.eigvals)[::-1] + self.eigvals = self.eigvals[self.sort] + self.eigvectors = self.eigvectors[:, self.sort] + self.fracs = self.eigvals / self.eigvals.sum() + self.Q = self.eigvals.shape[0] + + def center(self, X): + """ + Center `X` in pca space. + """ + X = X - self.mu + X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) + return X + + def _primal_eig(self, X): + return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X)) + + def _dual_eig(self, X): + dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X)) + relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:] + eigvals = dual_eigvals[relevant_dimensions] + eigvects = dual_eigvects[:, relevant_dimensions] + eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects) + eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects))) + return eigvals, eigvects + + def project(self, X, Q=None): + """ + Project X into pca space, defined by the Q highest eigenvalues. + Y = X dot V + """ + if Q is None: + Q = self.Q + if Q > X.shape[1]: + raise IndexError("requested dimension larger then input dimension") + X = self.center(X) + return X.dot(self.eigvectors[:, :Q]) + + def plot_fracs(self, Q=None, ax=None, fignum=None): + """ + Plot fractions of Eigenvalues sorted in descending order. + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if Q is None: + Q = self.Q + ticks = numpy.arange(Q) + bar = ax.bar(ticks - .4, self.fracs[:Q]) + ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1)) + ax.set_ylabel("Eigenvalue fraction") + ax.set_xlabel("PC") + ax.set_ylim(0, ax.get_ylim()[1]) + ax.set_xlim(ticks.min() - .5, ticks.max() + .5) + try: + pylab.tight_layout() + except: + pass + return bar + + def plot_2d(self, X, labels=None, s=20, marker='o', + dimensions=(0, 1), ax=None, colors=None, + fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable + ** kwargs): + """ + Plot dimensions `dimensions` with given labels against each other in + PC space. Labels can be any sequence of labels of dimensions X.shape[0]. + Labels can be drawn with a subsequent call to legend() + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if labels is None: + labels = numpy.zeros(X.shape[0]) + ulabels = [] + for lab in labels: + if not lab in ulabels: + ulabels.append(lab) + nlabels = len(ulabels) + if colors is None: + colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + X_ = self.project(X, self.Q)[:,dimensions] + kwargs.update(dict(s=s)) + plots = list() + for i, l in enumerate(ulabels): + kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) + ax.set_xlabel(r"PC$_1$") + ax.set_ylabel(r"PC$_2$") + try: + pylab.tight_layout() + except: + pass + return plots \ No newline at end of file