mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 22:42:37 +02:00
finally added pca package again
This commit is contained in:
parent
ed2aaab4bb
commit
321a75100c
5 changed files with 134 additions and 33 deletions
|
|
@ -5,7 +5,6 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..util.linalg import PCA
|
|
||||||
from ..core import GP, Param
|
from ..core import GP, Param
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import util
|
from .. import util
|
||||||
|
|
@ -29,9 +28,11 @@ class GPLVM(GP):
|
||||||
"""
|
"""
|
||||||
if X is None:
|
if X is None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
X = initialize_latent(init, input_dim, Y)
|
X, fracs = initialize_latent(init, input_dim, Y)
|
||||||
|
else:
|
||||||
|
fracs = np.ones(input_dim)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||||
|
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,12 +6,12 @@ import itertools
|
||||||
import pylab
|
import pylab
|
||||||
|
|
||||||
from ..core import Model
|
from ..core import Model
|
||||||
from ..util.linalg import PCA
|
|
||||||
from ..kern import Kern
|
from ..kern import Kern
|
||||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
from ..core.parameterization import Param, Parameterized
|
from ..core.parameterization import Param, Parameterized
|
||||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
|
from GPy.util.initialization import initialize_latent
|
||||||
|
|
||||||
class MRD(Model):
|
class MRD(Model):
|
||||||
"""
|
"""
|
||||||
|
|
@ -71,7 +71,7 @@ class MRD(Model):
|
||||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.random.uniform(0, .2, X.shape)
|
X_variance = np.random.uniform(0, .1, X.shape)
|
||||||
|
|
||||||
self.variational_prior = NormalPrior()
|
self.variational_prior = NormalPrior()
|
||||||
self.X = NormalPosterior(X, X_variance)
|
self.X = NormalPosterior(X, X_variance)
|
||||||
|
|
@ -147,11 +147,11 @@ class MRD(Model):
|
||||||
if Ylist is None:
|
if Ylist is None:
|
||||||
Ylist = self.Ylist
|
Ylist = self.Ylist
|
||||||
if init in "PCA_concat":
|
if init in "PCA_concat":
|
||||||
X = PCA(np.hstack(Ylist), self.input_dim)[0]
|
X = initialize_latent('PCA', np.hstack(Ylist), self.input_dim)
|
||||||
elif init in "PCA_single":
|
elif init in "PCA_single":
|
||||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||||
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||||
X[:, qs] = PCA(Y, len(qs))[0]
|
X[:, qs] = initialize_latent('PCA', Y, len(qs))
|
||||||
else: # init == 'random':
|
else: # init == 'random':
|
||||||
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
||||||
return X
|
return X
|
||||||
|
|
|
||||||
|
|
@ -5,13 +5,14 @@ Created on 24 Feb 2014
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from linalg import PCA
|
from GPy.util.pca import pca
|
||||||
|
|
||||||
def initialize_latent(init, input_dim, Y):
|
def initialize_latent(init, input_dim, Y):
|
||||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
PC = PCA(Y, input_dim)[0]
|
p = pca(Y)
|
||||||
|
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||||
else:
|
else:
|
||||||
pass
|
pass
|
||||||
return Xr
|
return Xr, p.fracs[:input_dim]
|
||||||
|
|
@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
|
||||||
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
||||||
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
||||||
|
|
||||||
def PCA(Y, input_dim):
|
|
||||||
"""
|
|
||||||
Principal component analysis: maximum likelihood solution by SVD
|
|
||||||
|
|
||||||
:param Y: NxD np.array of data
|
|
||||||
:param input_dim: int, dimension of projection
|
|
||||||
|
|
||||||
|
|
||||||
:rval X: - Nxinput_dim np.array of dimensionality reduced data
|
|
||||||
:rval W: - input_dimxD mapping from X to Y
|
|
||||||
|
|
||||||
"""
|
|
||||||
if not np.allclose(Y.mean(axis=0), 0.0):
|
|
||||||
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
|
|
||||||
|
|
||||||
# Y -= Y.mean(axis=0)
|
|
||||||
|
|
||||||
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
|
|
||||||
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
|
|
||||||
v = X.std(axis=0)
|
|
||||||
X /= v;
|
|
||||||
W *= v;
|
|
||||||
return X, W.T
|
|
||||||
|
|
|
||||||
122
GPy/util/pca.py
Normal file
122
GPy/util/pca.py
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
'''
|
||||||
|
Created on 10 Sep 2012
|
||||||
|
|
||||||
|
@author: Max Zwiessele
|
||||||
|
@copyright: Max Zwiessele 2012
|
||||||
|
'''
|
||||||
|
import numpy
|
||||||
|
import pylab
|
||||||
|
import matplotlib
|
||||||
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
|
||||||
|
class pca(object):
|
||||||
|
"""
|
||||||
|
pca module with automatic primal/dual determination.
|
||||||
|
"""
|
||||||
|
def __init__(self, X):
|
||||||
|
self.mu = X.mean(0)
|
||||||
|
self.sigma = X.std(0)
|
||||||
|
|
||||||
|
X = self.center(X)
|
||||||
|
|
||||||
|
# self.X = input
|
||||||
|
if X.shape[0] >= X.shape[1]:
|
||||||
|
# print "N >= D: using primal"
|
||||||
|
self.eigvals, self.eigvectors = self._primal_eig(X)
|
||||||
|
else:
|
||||||
|
# print "N < D: using dual"
|
||||||
|
self.eigvals, self.eigvectors = self._dual_eig(X)
|
||||||
|
self.sort = numpy.argsort(self.eigvals)[::-1]
|
||||||
|
self.eigvals = self.eigvals[self.sort]
|
||||||
|
self.eigvectors = self.eigvectors[:, self.sort]
|
||||||
|
self.fracs = self.eigvals / self.eigvals.sum()
|
||||||
|
self.Q = self.eigvals.shape[0]
|
||||||
|
|
||||||
|
def center(self, X):
|
||||||
|
"""
|
||||||
|
Center `X` in pca space.
|
||||||
|
"""
|
||||||
|
X = X - self.mu
|
||||||
|
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||||
|
return X
|
||||||
|
|
||||||
|
def _primal_eig(self, X):
|
||||||
|
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
|
||||||
|
|
||||||
|
def _dual_eig(self, X):
|
||||||
|
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
|
||||||
|
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
|
||||||
|
eigvals = dual_eigvals[relevant_dimensions]
|
||||||
|
eigvects = dual_eigvects[:, relevant_dimensions]
|
||||||
|
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
|
||||||
|
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
|
||||||
|
return eigvals, eigvects
|
||||||
|
|
||||||
|
def project(self, X, Q=None):
|
||||||
|
"""
|
||||||
|
Project X into pca space, defined by the Q highest eigenvalues.
|
||||||
|
Y = X dot V
|
||||||
|
"""
|
||||||
|
if Q is None:
|
||||||
|
Q = self.Q
|
||||||
|
if Q > X.shape[1]:
|
||||||
|
raise IndexError("requested dimension larger then input dimension")
|
||||||
|
X = self.center(X)
|
||||||
|
return X.dot(self.eigvectors[:, :Q])
|
||||||
|
|
||||||
|
def plot_fracs(self, Q=None, ax=None, fignum=None):
|
||||||
|
"""
|
||||||
|
Plot fractions of Eigenvalues sorted in descending order.
|
||||||
|
"""
|
||||||
|
if ax is None:
|
||||||
|
fig = pylab.figure(fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
if Q is None:
|
||||||
|
Q = self.Q
|
||||||
|
ticks = numpy.arange(Q)
|
||||||
|
bar = ax.bar(ticks - .4, self.fracs[:Q])
|
||||||
|
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
||||||
|
ax.set_ylabel("Eigenvalue fraction")
|
||||||
|
ax.set_xlabel("PC")
|
||||||
|
ax.set_ylim(0, ax.get_ylim()[1])
|
||||||
|
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
|
||||||
|
try:
|
||||||
|
pylab.tight_layout()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return bar
|
||||||
|
|
||||||
|
def plot_2d(self, X, labels=None, s=20, marker='o',
|
||||||
|
dimensions=(0, 1), ax=None, colors=None,
|
||||||
|
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
|
||||||
|
** kwargs):
|
||||||
|
"""
|
||||||
|
Plot dimensions `dimensions` with given labels against each other in
|
||||||
|
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
|
||||||
|
Labels can be drawn with a subsequent call to legend()
|
||||||
|
"""
|
||||||
|
if ax is None:
|
||||||
|
fig = pylab.figure(fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
if labels is None:
|
||||||
|
labels = numpy.zeros(X.shape[0])
|
||||||
|
ulabels = []
|
||||||
|
for lab in labels:
|
||||||
|
if not lab in ulabels:
|
||||||
|
ulabels.append(lab)
|
||||||
|
nlabels = len(ulabels)
|
||||||
|
if colors is None:
|
||||||
|
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
|
||||||
|
X_ = self.project(X, self.Q)[:,dimensions]
|
||||||
|
kwargs.update(dict(s=s))
|
||||||
|
plots = list()
|
||||||
|
for i, l in enumerate(ulabels):
|
||||||
|
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
|
||||||
|
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
|
||||||
|
ax.set_xlabel(r"PC$_1$")
|
||||||
|
ax.set_ylabel(r"PC$_2$")
|
||||||
|
try:
|
||||||
|
pylab.tight_layout()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return plots
|
||||||
Loading…
Add table
Add a link
Reference in a new issue