finally added pca package again

This commit is contained in:
Max Zwiessele 2014-03-24 11:16:57 +00:00
parent ed2aaab4bb
commit 321a75100c
5 changed files with 134 additions and 33 deletions

View file

@ -5,7 +5,6 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from .. import kern from .. import kern
from ..util.linalg import PCA
from ..core import GP, Param from ..core import GP, Param
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from .. import util from .. import util
@ -29,9 +28,11 @@ class GPLVM(GP):
""" """
if X is None: if X is None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
if kernel is None: if kernel is None:
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
likelihood = Gaussian() likelihood = Gaussian()

View file

@ -6,12 +6,12 @@ import itertools
import pylab import pylab
from ..core import Model from ..core import Model
from ..util.linalg import PCA
from ..kern import Kern from ..kern import Kern
from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..core.parameterization import Param, Parameterized from ..core.parameterization import Param, Parameterized
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from GPy.util.initialization import initialize_latent
class MRD(Model): class MRD(Model):
""" """
@ -71,7 +71,7 @@ class MRD(Model):
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
if X_variance is None: if X_variance is None:
X_variance = np.random.uniform(0, .2, X.shape) X_variance = np.random.uniform(0, .1, X.shape)
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
self.X = NormalPosterior(X, X_variance) self.X = NormalPosterior(X, X_variance)
@ -147,11 +147,11 @@ class MRD(Model):
if Ylist is None: if Ylist is None:
Ylist = self.Ylist Ylist = self.Ylist
if init in "PCA_concat": if init in "PCA_concat":
X = PCA(np.hstack(Ylist), self.input_dim)[0] X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim)
elif init in "PCA_single": elif init in "PCA_single":
X = np.zeros((Ylist[0].shape[0], self.input_dim)) X = np.zeros((Ylist[0].shape[0], self.input_dim))
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
X[:, qs] = PCA(Y, len(qs))[0] X[:, qs] = initialize_latent('PCA', Y, len(qs))
else: # init == 'random': else: # init == 'random':
X = np.random.randn(Ylist[0].shape[0], self.input_dim) X = np.random.randn(Ylist[0].shape[0], self.input_dim)
return X return X

View file

@ -5,13 +5,14 @@ Created on 24 Feb 2014
''' '''
import numpy as np import numpy as np
from linalg import PCA from GPy.util.pca import pca
def initialize_latent(init, input_dim, Y): def initialize_latent(init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim) Xr = np.random.randn(Y.shape[0], input_dim)
if init == 'PCA': if init == 'PCA':
PC = PCA(Y, input_dim)[0] p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1]))
Xr[:PC.shape[0], :PC.shape[1]] = PC Xr[:PC.shape[0], :PC.shape[1]] = PC
else: else:
pass pass
return Xr return Xr, p.fracs[:input_dim]

View file

@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
tmp, _ = dtrtrs(L, X, lower=1, trans=0) tmp, _ = dtrtrs(L, X, lower=1, trans=0)
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
def PCA(Y, input_dim):
"""
Principal component analysis: maximum likelihood solution by SVD
:param Y: NxD np.array of data
:param input_dim: int, dimension of projection
:rval X: - Nxinput_dim np.array of dimensionality reduced data
:rval W: - input_dimxD mapping from X to Y
"""
if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
# Y -= Y.mean(axis=0)
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
v = X.std(axis=0)
X /= v;
W *= v;
return X, W.T

122
GPy/util/pca.py Normal file
View file

@ -0,0 +1,122 @@
'''
Created on 10 Sep 2012
@author: Max Zwiessele
@copyright: Max Zwiessele 2012
'''
import numpy
import pylab
import matplotlib
from numpy.linalg.linalg import LinAlgError
class pca(object):
"""
pca module with automatic primal/dual determination.
"""
def __init__(self, X):
self.mu = X.mean(0)
self.sigma = X.std(0)
X = self.center(X)
# self.X = input
if X.shape[0] >= X.shape[1]:
# print "N >= D: using primal"
self.eigvals, self.eigvectors = self._primal_eig(X)
else:
# print "N < D: using dual"
self.eigvals, self.eigvectors = self._dual_eig(X)
self.sort = numpy.argsort(self.eigvals)[::-1]
self.eigvals = self.eigvals[self.sort]
self.eigvectors = self.eigvectors[:, self.sort]
self.fracs = self.eigvals / self.eigvals.sum()
self.Q = self.eigvals.shape[0]
def center(self, X):
"""
Center `X` in pca space.
"""
X = X - self.mu
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
return X
def _primal_eig(self, X):
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
def _dual_eig(self, X):
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
eigvals = dual_eigvals[relevant_dimensions]
eigvects = dual_eigvects[:, relevant_dimensions]
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
return eigvals, eigvects
def project(self, X, Q=None):
"""
Project X into pca space, defined by the Q highest eigenvalues.
Y = X dot V
"""
if Q is None:
Q = self.Q
if Q > X.shape[1]:
raise IndexError("requested dimension larger then input dimension")
X = self.center(X)
return X.dot(self.eigvectors[:, :Q])
def plot_fracs(self, Q=None, ax=None, fignum=None):
"""
Plot fractions of Eigenvalues sorted in descending order.
"""
if ax is None:
fig = pylab.figure(fignum)
ax = fig.add_subplot(111)
if Q is None:
Q = self.Q
ticks = numpy.arange(Q)
bar = ax.bar(ticks - .4, self.fracs[:Q])
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
ax.set_ylabel("Eigenvalue fraction")
ax.set_xlabel("PC")
ax.set_ylim(0, ax.get_ylim()[1])
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
try:
pylab.tight_layout()
except:
pass
return bar
def plot_2d(self, X, labels=None, s=20, marker='o',
dimensions=(0, 1), ax=None, colors=None,
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
** kwargs):
"""
Plot dimensions `dimensions` with given labels against each other in
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
Labels can be drawn with a subsequent call to legend()
"""
if ax is None:
fig = pylab.figure(fignum)
ax = fig.add_subplot(111)
if labels is None:
labels = numpy.zeros(X.shape[0])
ulabels = []
for lab in labels:
if not lab in ulabels:
ulabels.append(lab)
nlabels = len(ulabels)
if colors is None:
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
X_ = self.project(X, self.Q)[:,dimensions]
kwargs.update(dict(s=s))
plots = list()
for i, l in enumerate(ulabels):
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
ax.set_xlabel(r"PC$_1$")
ax.set_ylabel(r"PC$_2$")
try:
pylab.tight_layout()
except:
pass
return plots