From d954829a40748b108653c96b8283fb5432ce001a Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Thu, 9 Oct 2014 08:40:15 +0100 Subject: [PATCH] [pca] missing data is now handled as mean --- GPy/util/pca.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/GPy/util/pca.py b/GPy/util/pca.py index 046f47d7..996e6f48 100644 --- a/GPy/util/pca.py +++ b/GPy/util/pca.py @@ -11,14 +11,16 @@ try: except: pass from numpy.linalg.linalg import LinAlgError +from operator import setitem +import itertools class pca(object): """ pca module with automatic primal/dual determination. """ def __init__(self, X): - self.mu = X.mean(0) - self.sigma = X.std(0) + self.mu = None + self.sigma = None X = self.center(X) @@ -39,6 +41,13 @@ class pca(object): """ Center `X` in pca space. """ + X = X.copy() + inan = numpy.isnan(X) + if self.mu is None: + X_ = numpy.ma.masked_array(X, inan) + self.mu = X_.mean(0).base + self.sigma = X_.std(0).base + reduce(lambda y,x: setitem(x[0], x[1], x[2]), itertools.izip(X.T, inan.T, self.mu), None) X = X - self.mu X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) return X @@ -94,7 +103,7 @@ class pca(object): fignum=None, cmap=None, # @UndefinedVariable ** kwargs): """ - Plot dimensions `dimensions` with given labels against each other in + Plot dimensions `dimensions` with given labels against each other in PC space. Labels can be any sequence of labels of dimensions X.shape[0]. Labels can be drawn with a subsequent call to legend() """