Merge remote-tracking branch 'origin/devel' into feature-multioutput

2026-07-14 16:32:15 +02:00 · 2018-06-24 12:43:49 +03:00 · 2018-06-24 12:43:49 +03:00 · afe9206656
commit afe9206656
parent 8e69db51a2 d85c9d5379
19 changed files with 642 additions and 142 deletions
--- a/GPy/version.py
+++ b/GPy/version.py
@ -1 +1 @@
-__version__ = "1.8.5"
+__version__ = "1.9.2"
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -282,10 +282,12 @@ class GP(Model):
            mu += self.mean_function.f(Xnew)
        return mu, var

-    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, 
+                likelihood=None, include_likelihood=True):
        """
-        Predict the function(s) at the new point(s) Xnew. This includes the likelihood
-        variance added to the predicted underlying function (usually referred to as f).
+        Predict the function(s) at the new point(s) Xnew. This includes the
+        likelihood variance added to the predicted underlying function
+        (usually referred to as f).

        In order to predict without adding in the likelihood give
        `include_likelihood=False`, or refer to self.predict_noiseless().
@ -295,33 +297,49 @@ class GP(Model):
        :param full_cov: whether to return the full covariance matrix, or just
                         the diagonal
        :type full_cov: bool
-        :param Y_metadata: metadata about the predicting point to pass to the likelihood
+        :param Y_metadata: metadata about the predicting point to pass to the
+                           likelihood
        :param kern: The kernel to use for prediction (defaults to the model
                     kern). this is useful for examining e.g. subprocesses.
-        :param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
+        :param include_likelihood: Whether or not to add likelihood noise to
+                                   the predicted underlying latent function f.
+        :type include_likelihood: bool

        :returns: (mean, var):
            mean: posterior mean, a Numpy array, Nnew x self.input_dim
-            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False,
+                 Nnew x Nnew otherwise

-           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
-           This is to allow for different normalizations of the output dimensions.
+            If full_cov and self.input_dim > 1, the return shape of var is
+            Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return
+            shape is Nnew x Nnew. This is to allow for different normalizations
+            of the output dimensions.

-        Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
+        Note: If you want the predictive quantiles (e.g. 95% confidence
+        interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
        """
-        #predict the latent function values
-        mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
+
+        # Predict the latent function values
+        mean, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)

        if include_likelihood:
            # now push through likelihood
            if likelihood is None:
                likelihood = self.likelihood
-            mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
+            mean, var = likelihood.predictive_values(mean, var, full_cov,
+                                                     Y_metadata=Y_metadata)

        if self.normalizer is not None:
-            mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
+            mean = self.normalizer.inverse_mean(mean)

-        return mu, var
+            # We need to create 3d array for the full covariance matrix with
+            # multiple outputs.
+            if full_cov & (mean.shape[1] > 1):
+                var = self.normalizer.inverse_covariance(var)
+            else:
+                var = self.normalizer.inverse_variance(var)
+
+        return mean, var

    def predict_noiseless(self,  Xnew, full_cov=False, Y_metadata=None, kern=None):
        """
@ -376,13 +394,16 @@ class GP(Model):

    def predictive_gradients(self, Xnew, kern=None):
        """
-        Compute the derivatives of the predicted latent function with respect to X*
+        Compute the derivatives of the predicted latent function with respect
+        to X*

        Given a set of points at which to predict X* (size [N*,Q]), compute the
        derivatives of the mean and variance. Resulting arrays are sized:
-         dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
+            dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP
+            (usually one).

-        Note that this is not the same as computing the mean and variance of the derivative of the function!
+        Note that this is not the same as computing the mean and variance of
+        the derivative of the function!

         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
        :param X: The points at which to get the predictive gradients
@ -393,25 +414,32 @@ class GP(Model):
        """
        if kern is None:
            kern = self.kern
-        mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
+        mean_jac = np.empty((Xnew.shape[0], Xnew.shape[1], self.output_dim))

        for i in range(self.output_dim):
-            mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
+            mean_jac[:, :, i] = kern.gradients_X(
+                self.posterior.woodbury_vector[:, i:i+1].T, Xnew,
+                self._predictive_variable)

-        # gradients wrt the diagonal part k_{xx}
-        dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
-        #grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
+        # Gradients wrt the diagonal part k_{xx}
+        dv_dX = kern.gradients_X_diag(np.ones(Xnew.shape[0]), Xnew)
+
+        # Grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
        if self.posterior.woodbury_inv.ndim == 3:
-            tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
-            tmp[:] = dv_dX[:,:,None]
+            var_jac = np.empty(dv_dX.shape +
+                               (self.posterior.woodbury_inv.shape[2],))
+            var_jac[:] = dv_dX[:, :, None]
            for i in range(self.posterior.woodbury_inv.shape[2]):
-                alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
-                tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
+                alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
+                                   self.posterior.woodbury_inv[:, :, i])
+                var_jac[:, :, i] += kern.gradients_X(alpha, Xnew,
+                                                     self._predictive_variable)
        else:
-            tmp = dv_dX
-            alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
-            tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)
-        return mean_jac, tmp
+            var_jac = dv_dX
+            alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable),
+                               self.posterior.woodbury_inv)
+            var_jac += kern.gradients_X(alpha, Xnew, self._predictive_variable)
+        return mean_jac, var_jac

    def predict_jacobian(self, Xnew, kern=None, full_cov=False):
        """
@ -564,7 +592,7 @@ class GP(Model):
        if self.output_dim == 1:
            return sim_one_dim(m, v)
        else:
-            fsim = np.empty((self.output_dim, self.num_data, size))
+            fsim = np.empty((self.output_dim, X.shape[0], size))
            for d in range(self.output_dim):
                if full_cov and v.ndim == 3:
                    fsim[d] = sim_one_dim(m[:, d], v[:, :, d])
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -288,9 +288,17 @@ class Gamma(Prior):
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

+    @property
+    def a(self):
+        return self._a
+
+    @property
+    def b(self):
+        return self._b
+
    def __init__(self, a, b):
-        self.a = float(a)
-        self.b = float(b)
+        self._a = float(a)
+        self._b = float(b)
        self.constant = -gammaln(self.a) + a * np.log(b)

    def __str__(self):
@ -333,8 +341,8 @@ class Gamma(Prior):
        return self.a, self.b

    def __setstate__(self, state):
-        self.a = state[0]
-        self.b = state[1]
+        self._a = state[0]
+        self._b = state[1]
        self.constant = -gammaln(self.a) + self.a * np.log(self.b)

 class InverseGamma(Gamma):
@ -360,8 +368,8 @@ class InverseGamma(Gamma):
        return cls._instances[-1]()

    def __init__(self, a, b):
-        self.a = float(a)
-        self.b = float(b)
+        self._a = float(a)
+        self._b = float(b)
        self.constant = -gammaln(self.a) + a * np.log(b)

    def __str__(self):
--- a/GPy/examples/state_space.py
+++ b/GPy/examples/state_space.py
@ -4,23 +4,26 @@ import matplotlib.pyplot as plt

 import GPy.models.state_space_model as SS_model

-X = np.linspace(0, 10, 2000)[:, None]
-Y = np.sin(X) + np.random.randn(*X.shape)*0.1
+def state_space_example():
+    X = np.linspace(0, 10, 2000)[:, None]
+    Y = np.sin(X) + np.random.randn(*X.shape)*0.1

-kernel1 = GPy.kern.Matern32(X.shape[1])
-m1  = GPy.models.GPRegression(X,Y, kernel1)
+    kernel1 = GPy.kern.Matern32(X.shape[1])
+    m1  = GPy.models.GPRegression(X,Y, kernel1)

-print(m1)
-m1.optimize(optimizer='bfgs',messages=True)
+    print(m1)
+    m1.optimize(optimizer='bfgs',messages=True)

-print(m1)
+    print(m1)

-kernel2 = GPy.kern.sde_Matern32(X.shape[1])
-#m2  = SS_model.StateSpace(X,Y, kernel2)
-m2 = GPy.models.StateSpace(X,Y, kernel2)
-print(m2)
+    kernel2 = GPy.kern.sde_Matern32(X.shape[1])
+    #m2  = SS_model.StateSpace(X,Y, kernel2)
+    m2 = GPy.models.StateSpace(X,Y, kernel2)
+    print(m2)

-m2.optimize(optimizer='bfgs',messages=True)
+    m2.optimize(optimizer='bfgs',messages=True)

-print(m2)
+    print(m2)
+
+    return m1, m2

--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -132,6 +132,13 @@ class posteriorParamsDTC(posteriorParamsBase):
        self.mu += (delta_v-delta_tau*self.mu[i])*si
        #mu = np.dot(Sigma, v_tilde)

+    def to_dict(self):
+        return { "mu": self.mu.tolist(), "Sigma_diag": self.Sigma_diag.tolist()}
+
+    @staticmethod
+    def from_dict(input_dict):
+        return posteriorParamsDTC(np.array(input_dict["mu"]), np.array(input_dict["Sigma_diag"]))
+
    @staticmethod
    def _recompute(LLT0, Kmn, ga_approx):
        LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
@ -533,3 +540,35 @@ class EPDTC(EPBase, VarDTC):
            #Posterior distribution parameters update
            if self.parallel_updates == False:
                post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)
+
+
+    def to_dict(self):
+        input_dict = super(EPDTC, self)._to_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EPDTC"
+        if self.ga_approx_old is not  None:
+            input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
+        if self._ep_approximation is not  None:
+            input_dict["_ep_approximation"] = {}
+            input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
+            input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
+            input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
+            input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
+
+        return input_dict
+
+    @staticmethod
+    def _from_dict(inference_class, input_dict):
+        ga_approx_old = input_dict.pop('ga_approx_old', None)
+        if ga_approx_old is not None:
+            ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
+        _ep_approximation_dict = input_dict.pop('_ep_approximation', None)
+        _ep_approximation = []
+        if _ep_approximation is not None:
+            _ep_approximation.append(posteriorParamsDTC.from_dict(_ep_approximation_dict["post_params"]))
+            _ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
+            _ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
+            _ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
+        ee = EPDTC(**input_dict)
+        ee.ga_approx_old = ga_approx_old
+        ee._ep_approximation = _ep_approximation
+        return ee
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -31,13 +31,16 @@ class Prod(CombinationKernel):

    """
    def __init__(self, kernels, name='mul'):
-        for i, kern in enumerate(kernels[:]):
+        _newkerns = []
+        for kern in kernels:
            if isinstance(kern, Prod):
-                del kernels[i]
-                for part in kern.parts[::-1]:
-                    kern.unlink_parameter(part)
-                    kernels.insert(i, part)
-        super(Prod, self).__init__(kernels, name)
+                for part in kern.parts:
+                    #kern.unlink_parameter(part)
+                    _newkerns.append(part.copy())
+            else:
+                _newkerns.append(kern.copy())
+
+        super(Prod, self).__init__(_newkerns, name)

    def to_dict(self):
        input_dict = super(Prod, self)._to_dict()
--- a/GPy/plotting/gpy_plot/gp_plots.py
+++ b/GPy/plotting/gpy_plot/gp_plots.py
@ -337,7 +337,7 @@ def plot(self, plot_limits=None, fixed_inputs=None,
        plot_data = False
    plots = {}
    if hasattr(self, 'Z') and plot_inducing:
-        plots.update(_plot_inducing(self, canvas, visible_dims, projection, 'Inducing'))
+        plots.update(_plot_inducing(self, canvas, free_dims, projection, 'Inducing'))
    if plot_data:
        plots.update(_plot_data(self, canvas, which_data_rows, which_data_ycols, free_dims, projection, "Data"))
        plots.update(_plot_data_error(self, canvas, which_data_rows, which_data_ycols, free_dims, projection, "Data Error"))
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -118,6 +118,51 @@ class MiscTests(unittest.TestCase):
        from scipy.stats import norm
        np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())

+    def test_multioutput_regression_with_normalizer(self):
+        """
+        Test that normalizing works in multi-output case
+        """
+
+        # Create test inputs
+        X = self.X
+        Y1 = np.sin(X) + np.random.randn(*X.shape) * 0.2
+        Y2 = -np.sin(X) + np.random.randn(*X.shape) * 0.05
+        Y = np.hstack((Y1, Y2))
+
+        mu, std = Y.mean(0), Y.std(0)
+        m = GPy.models.GPRegression(X, Y, normalizer=True)
+        m.optimize(messages=True)
+        assert(m.checkgrad())
+        k = GPy.kern.RBF(1)
+        m2 = GPy.models.GPRegression(X, (Y-mu)/std, normalizer=False)
+        m2[:] = m[:]
+
+        mu1, var1 = m.predict(m.X, full_cov=True)
+        mu2, var2 = m2.predict(m2.X, full_cov=True)
+        np.testing.assert_allclose(mu1, (mu2*std)+mu)
+        np.testing.assert_allclose(var1, var2[:, :, None]*std[None, None, :]**2)
+
+        mu1, var1 = m.predict(m.X, full_cov=False)
+        mu2, var2 = m2.predict(m2.X, full_cov=False)
+
+        np.testing.assert_allclose(mu1, (mu2*std)+mu)
+        np.testing.assert_allclose(var1, var2*std[None, :]**2)
+
+        q50n = m.predict_quantiles(m.X, (50,))
+        q50 = m2.predict_quantiles(m2.X, (50,))
+
+        np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
+
+        # Test variance component:
+        qs = np.array([2.5, 97.5])
+        # The quantiles get computed before unormalization
+        # And transformed using the mean transformation:
+        c = np.random.choice(X.shape[0])
+        q95 = m2.predict_quantiles(X[[c]], qs)
+        mu, var = m2.predict(X[[c]])
+        from scipy.stats import norm
+        np.testing.assert_allclose((mu.T+(norm.ppf(qs/100.)*np.sqrt(var))).T.flatten(), np.array(q95).flatten())
+
    def check_jacobian(self):
        try:
            import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
--- a/GPy/testing/serialization_tests.py
+++ b/GPy/testing/serialization_tests.py
@ -116,11 +116,45 @@ class Test(unittest.TestCase):
        np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
        np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])

+
+        e1 = GPy.inference.latent_function_inference.expectation_propagation.EPDTC(ep_mode="nested")
+        e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
+        e1._ep_approximation = []
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParamsDTC(np.random.rand(10),np.random.rand(10)))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
+        e1._ep_approximation[-1].v = np.random.rand(10)
+        e1._ep_approximation[-1].tau = np.random.rand(10)
+        e1._ep_approximation.append(np.random.rand(10))
+        e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
+
+
+        assert type(e1) == type(e1_r)
+        assert e1.epsilon==e1_r.epsilon
+        assert e1.eta==e1_r.eta
+        assert e1.delta==e1_r.delta
+        assert e1.always_reset==e1_r.always_reset
+        assert e1.max_iters==e1_r.max_iters
+        assert e1.ep_mode==e1_r.ep_mode
+        assert e1.parallel_updates==e1_r.parallel_updates
+
+        np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
+        np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].Sigma_diag[:], e1_r._ep_approximation[0].Sigma_diag[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
+
+
        e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
        e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())

        assert type(e2) == type(e2_r)

+
    def test_serialize_deserialize_model(self):
        np.random.seed(fixed_seed)
        N = 20
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@ -28,7 +28,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #===============================================================================

-import unittest, numpy as np
+import unittest
+import numpy as np
 import GPy

 class TestDebug(unittest.TestCase):
@ -225,3 +226,17 @@ class TestUnivariateGaussian(unittest.TestCase):
        for i in range(len(pySols)):
          diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
        self.assertTrue(diff  < 1e-8)
+
+class TestStandardize(unittest.TestCase):
+    def setUp(self):
+        self.normalizer = GPy.util.normalizer.Standardize()
+        y = np.stack([np.random.randn(10), 2*np.random.randn(10)], axis=1)
+        self.normalizer.scale_by(y)
+    
+    def test_inverse_covariance(self):
+        """
+        Test inverse covariance outputs correct size
+        """
+        covariance = np.random.rand(100, 100)
+        output = self.normalizer.inverse_covariance(covariance)
+        self.assertTrue(output.shape == (100, 100, 2))
--- a/GPy/util/normalizer.py
+++ b/GPy/util/normalizer.py
@ -3,30 +3,46 @@ Created on Aug 27, 2014

@author: Max Zwiessele
 '''
-import logging
 import numpy as np

+
 class _Norm(object):
    def __init__(self):
        pass
+
    def scale_by(self, Y):
        """
        Use data matrix Y as normalization space to work in.
        """
        raise NotImplementedError
+
    def normalize(self, Y):
        """
        Project Y into normalized space
        """
        if not self.scaled():
            raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
+
    def inverse_mean(self, X):
        """
        Project the normalized object X into space of Y
        """
        raise NotImplementedError
+
    def inverse_variance(self, var):
        return var
+
+    def inverse_covariance(self, covariance):
+        """
+        Convert scaled covariance to unscaled.
+        Args:
+            covariance - numpy array of shape (n, n)
+        Returns:
+            covariance - numpy array of shape (n, n, m) where m is number of
+                         outputs
+        """
+        raise NotImplementedError
+
    def scaled(self):
        """
        Whether this Norm object has been initialized.
@ -57,17 +73,25 @@ class _Norm(object):
 class Standardize(_Norm):
    def __init__(self):
        self.mean = None
+
    def scale_by(self, Y):
        Y = np.ma.masked_invalid(Y, copy=False)
        self.mean = Y.mean(0).view(np.ndarray)
        self.std = Y.std(0).view(np.ndarray)
+
    def normalize(self, Y):
        super(Standardize, self).normalize(Y)
        return (Y-self.mean)/self.std
+
    def inverse_mean(self, X):
        return (X*self.std)+self.mean
+
    def inverse_variance(self, var):
        return (var*(self.std**2))
+
+    def inverse_covariance(self, covariance):
+        return (covariance[..., np.newaxis]*(self.std**2))
+
    def scaled(self):
        return self.mean is not None

@ -87,29 +111,3 @@ class Standardize(_Norm):
        if "std" in input_dict:
            s.std = np.array(input_dict["std"])
        return s
-
-# Inverse variance to be implemented, disabling for now
-# If someone in the future want to implement this,
-# we need to implement the inverse variance for
-# normalization. This means, we need to know the factor
-# for the variance to be multiplied to the variance in
-# normalized space. This is easy to compute for standardization
-# (see above) but gets tricky here.
-# class Normalize(_Norm):
-#     def __init__(self):
-#         self.ymin = None
-#         self.ymax = None
-#     def scale_by(self, Y):
-#         Y = np.ma.masked_invalid(Y, copy=False)
-#         self.ymin = Y.min(0).view(np.ndarray)
-#         self.ymax = Y.max(0).view(np.ndarray)
-#     def normalize(self, Y):
-#         super(Normalize, self).normalize(Y)
-#         return (Y - self.ymin) / (self.ymax - self.ymin) - .5
-#     def inverse_mean(self, X):
-#         return (X + .5) * (self.ymax - self.ymin) + self.ymin
-#     def inverse_variance(self, var):
-#
-#         return (var*(self.std**2))
-#     def scaled(self):
-#         return (self.ymin is not None) and (self.ymax is not None)