[pca] missing data is now handled as mean

This commit is contained in:
Max Zwiessele 2014-10-09 08:40:15 +01:00
parent a63278c234
commit d954829a40

View file

@ -11,14 +11,16 @@ try:
except:
pass
from numpy.linalg.linalg import LinAlgError
from operator import setitem
import itertools
class pca(object):
"""
pca module with automatic primal/dual determination.
"""
def __init__(self, X):
self.mu = X.mean(0)
self.sigma = X.std(0)
self.mu = None
self.sigma = None
X = self.center(X)
@ -39,6 +41,13 @@ class pca(object):
"""
Center `X` in pca space.
"""
X = X.copy()
inan = numpy.isnan(X)
if self.mu is None:
X_ = numpy.ma.masked_array(X, inan)
self.mu = X_.mean(0).base
self.sigma = X_.std(0).base
reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None)
X = X - self.mu
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
return X
@ -94,7 +103,7 @@ class pca(object):
fignum=None, cmap=None, # @UndefinedVariable
** kwargs):
"""
Plot dimensions `dimensions` with given labels against each other in
Plot dimensions `dimensions` with given labels against each other in
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
Labels can be drawn with a subsequent call to legend()
"""