mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
[pca] missing data is now handled as mean
This commit is contained in:
parent
a63278c234
commit
d954829a40
1 changed files with 12 additions and 3 deletions
|
|
@ -11,14 +11,16 @@ try:
|
|||
except:
|
||||
pass
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
from operator import setitem
|
||||
import itertools
|
||||
|
||||
class pca(object):
|
||||
"""
|
||||
pca module with automatic primal/dual determination.
|
||||
"""
|
||||
def __init__(self, X):
|
||||
self.mu = X.mean(0)
|
||||
self.sigma = X.std(0)
|
||||
self.mu = None
|
||||
self.sigma = None
|
||||
|
||||
X = self.center(X)
|
||||
|
||||
|
|
@ -39,6 +41,13 @@ class pca(object):
|
|||
"""
|
||||
Center `X` in pca space.
|
||||
"""
|
||||
X = X.copy()
|
||||
inan = numpy.isnan(X)
|
||||
if self.mu is None:
|
||||
X_ = numpy.ma.masked_array(X, inan)
|
||||
self.mu = X_.mean(0).base
|
||||
self.sigma = X_.std(0).base
|
||||
reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
|
||||
X = X - self.mu
|
||||
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||
return X
|
||||
|
|
@ -94,7 +103,7 @@ class pca(object):
|
|||
fignum=None, cmap=None, # @UndefinedVariable
|
||||
** kwargs):
|
||||
"""
|
||||
Plot dimensions `dimensions` with given labels against each other in
|
||||
Plot dimensions `dimensions` with given labels against each other in
|
||||
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
|
||||
Labels can be drawn with a subsequent call to legend()
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue