From ccfcfa1a85a97163bdc42bb5287ccf1b017647e0 Mon Sep 17 00:00:00 2001
From: Mark Pullin <marpulli@amazon.com>
Date: Mon, 5 Feb 2018 11:21:02 +0000
Subject: [PATCH] Allow calculation of full predictive covariance matrices with
 multiple outputs and normalization

---
 GPy/core/gp.py             | 46 +++++++++++++++++++++++----------
 GPy/testing/model_tests.py | 18 +++++++++++++
 GPy/testing/util_tests.py  | 17 ++++++++++++-
 GPy/util/normalizer.py     | 52 ++++++++++++++++++--------------------
 4 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index d6f42c1c..b7a9d1d3 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -282,10 +282,12 @@ class GP(Model):
             mu += self.mean_function.f(Xnew)
         return mu, var
 
-    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, 
+                likelihood=None, include_likelihood=True):
         """
-        Predict the function(s) at the new point(s) Xnew. This includes the likelihood
-        variance added to the predicted underlying function (usually referred to as f).
+        Predict the function(s) at the new point(s) Xnew. This includes the
+        likelihood variance added to the predicted underlying function
+        (usually referred to as f).
 
         In order to predict without adding in the likelihood give
         `include_likelihood=False`, or refer to self.predict_noiseless().
@@ -295,33 +297,49 @@ class GP(Model):
         :param full_cov: whether to return the full covariance matrix, or just
                          the diagonal
         :type full_cov: bool
-        :param Y_metadata: metadata about the predicting point to pass to the likelihood
+        :param Y_metadata: metadata about the predicting point to pass to the
+                           likelihood
         :param kern: The kernel to use for prediction (defaults to the model
                      kern). this is useful for examining e.g. subprocesses.
-        :param bool include_likelihood: Whether or not to add likelihood noise to the predicted underlying latent function f.
+        :param include_likelihood: Whether or not to add likelihood noise to
+                                   the predicted underlying latent function f.
+        :type include_likelihood: bool
 
         :returns: (mean, var):
             mean: posterior mean, a Numpy array, Nnew x self.input_dim
-            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False,
+                 Nnew x Nnew otherwise
 
-           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
-           This is to allow for different normalizations of the output dimensions.
+            If full_cov and self.input_dim > 1, the return shape of var is
+            Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return
+            shape is Nnew x Nnew. This is to allow for different normalizations
+            of the output dimensions.
 
-        Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
+        Note: If you want the predictive quantiles (e.g. 95% confidence
+        interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
         """
-        #predict the latent function values
-        mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
+
+        # Predict the latent function values
+        mean, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
 
         if include_likelihood:
             # now push through likelihood
             if likelihood is None:
                 likelihood = self.likelihood
-            mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
+            mean, var = likelihood.predictive_values(mean, var, full_cov,
+                                                     Y_metadata=Y_metadata)
 
         if self.normalizer is not None:
-            mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
+            mean = self.normalizer.inverse_mean(mean)
 
-        return mu, var
+            # We need to create 3d array for the full covariance matrix with
+            # multiple outputs.
+            if full_cov & (mean.shape[1] > 1):
+                var = self.normalizer.inverse_covariance(var)
+            else:
+                var = self.normalizer.inverse_variance(var)
+
+        return mean, var
 
     def predict_noiseless(self,  Xnew, full_cov=False, Y_metadata=None, kern=None):
         """
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index c8d097a3..3a0cdcfa 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -118,6 +118,24 @@ class MiscTests(unittest.TestCase):
         from scipy.stats import norm
         np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
 
+    def test_multioutput_regression_with_normalizer(self):
+        """
+        Test that normalizing works in multi-output case
+        """
+
+        # Create test inputs
+        X1 = (np.random.rand(50, 1) * 8)
+        Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
+        Y2 = -np.sin(X1) + np.random.randn(*X1.shape) * 0.05
+        Y = np.hstack((Y1, Y2))
+
+        m = GPy.models.GPRegression(X1, Y, normalizer=True)
+
+        n_test_point = 10
+        mean, covaraince = m.predict(np.random.rand(n_test_point, 1),
+                                     full_cov=True)
+        self.assertTrue(covaraince.shape == (n_test_point, n_test_point, 2))
+
     def check_jacobian(self):
         try:
             import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
index 5cd275c2..bdab63e8 100644
--- a/GPy/testing/util_tests.py
+++ b/GPy/testing/util_tests.py
@@ -28,7 +28,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #===============================================================================
 
-import unittest, numpy as np
+import unittest
+import numpy as np
 import GPy
 
 class TestDebug(unittest.TestCase):
@@ -225,3 +226,17 @@ class TestUnivariateGaussian(unittest.TestCase):
         for i in range(len(pySols)):
           diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
         self.assertTrue(diff  < 1e-8)
+
+class TestStandardize(unittest.TestCase):
+    def setUp(self):
+        self.normalizer = GPy.util.normalizer.Standardize()
+        y = np.stack([np.random.randn(10), 2*np.random.randn(10)], axis=1)
+        self.normalizer.scale_by(y)
+    
+    def test_inverse_covariance(self):
+        """
+        Test inverse covariance outputs correct size
+        """
+        covariance = np.random.rand(100, 100)
+        output = self.normalizer.inverse_covariance(covariance)
+        self.assertTrue(output.shape == (100, 100, 2))
\ No newline at end of file
diff --git a/GPy/util/normalizer.py b/GPy/util/normalizer.py
index b62ac35b..7a3ee020 100644
--- a/GPy/util/normalizer.py
+++ b/GPy/util/normalizer.py
@@ -3,30 +3,46 @@ Created on Aug 27, 2014
 
 @author: Max Zwiessele
 '''
-import logging
 import numpy as np
 
+
 class _Norm(object):
     def __init__(self):
         pass
+
     def scale_by(self, Y):
         """
         Use data matrix Y as normalization space to work in.
         """
         raise NotImplementedError
+
     def normalize(self, Y):
         """
         Project Y into normalized space
         """
         if not self.scaled():
             raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
+
     def inverse_mean(self, X):
         """
         Project the normalized object X into space of Y
         """
         raise NotImplementedError
+
     def inverse_variance(self, var):
         return var
+
+    def inverse_covariance(self, covariance):
+        """
+        Convert scaled covariance to unscaled.
+        Args:
+            covariance - numpy array of shape (n, n)
+        Returns:
+            covariance - numpy array of shape (n, n, m) where m is number of
+                         outputs
+        """
+        raise NotImplementedError
+
     def scaled(self):
         """
         Whether this Norm object has been initialized.
@@ -57,17 +73,25 @@ class _Norm(object):
 class Standardize(_Norm):
     def __init__(self):
         self.mean = None
+
     def scale_by(self, Y):
         Y = np.ma.masked_invalid(Y, copy=False)
         self.mean = Y.mean(0).view(np.ndarray)
         self.std = Y.std(0).view(np.ndarray)
+
     def normalize(self, Y):
         super(Standardize, self).normalize(Y)
         return (Y-self.mean)/self.std
+
     def inverse_mean(self, X):
         return (X*self.std)+self.mean
+
     def inverse_variance(self, var):
         return (var*(self.std**2))
+
+    def inverse_covariance(self, covariance):
+        return (covariance[..., np.newaxis]*(self.std**2))
+
     def scaled(self):
         return self.mean is not None
 
@@ -87,29 +111,3 @@ class Standardize(_Norm):
         if "std" in input_dict:
             s.std = np.array(input_dict["std"])
         return s
-
-# Inverse variance to be implemented, disabling for now
-# If someone in the future want to implement this,
-# we need to implement the inverse variance for
-# normalization. This means, we need to know the factor
-# for the variance to be multiplied to the variance in
-# normalized space. This is easy to compute for standardization
-# (see above) but gets tricky here.
-# class Normalize(_Norm):
-#     def __init__(self):
-#         self.ymin = None
-#         self.ymax = None
-#     def scale_by(self, Y):
-#         Y = np.ma.masked_invalid(Y, copy=False)
-#         self.ymin = Y.min(0).view(np.ndarray)
-#         self.ymax = Y.max(0).view(np.ndarray)
-#     def normalize(self, Y):
-#         super(Normalize, self).normalize(Y)
-#         return (Y - self.ymin) / (self.ymax - self.ymin) - .5
-#     def inverse_mean(self, X):
-#         return (X + .5) * (self.ymax - self.ymin) + self.ymin
-#     def inverse_variance(self, var):
-#
-#         return (var*(self.std**2))
-#     def scaled(self):
-#         return (self.ymin is not None) and (self.ymax is not None)