Added multioutput_gp model and tests for it

2026-04-28 14:26:23 +02:00 · 2018-09-05 16:37:40 +03:00 · 2018-09-05 16:37:40 +03:00 · 0daad96da1
commit 0daad96da1
parent 101e93b73e
3 changed files with 216 additions and 0 deletions
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid
 from .gp_multiout_regression import GPMultioutRegression
 from .gp_multiout_regression_md import GPMultioutRegressionMD
 from .tp_regression import TPRegression
 from .multioutput_gp import MultioutputGP
--- a/GPy/models/multioutput_gp.py
+++ b/GPy/models/multioutput_gp.py
@ -0,0 +1,154 @@
 # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 import itertools
 from ..core.model import Model
 from ..core.parameterization.variational import VariationalPosterior
 from ..core.mapping import Mapping
 from .. import likelihoods
 from ..likelihoods.gaussian import Gaussian
 from .. import kern
 from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
 from ..util.normalizer import Standardize
 from .. import util
 from paramz import ObsAr
 from ..core.gp import GP
 from GPy.util.multioutput import index_to_slices
 import logging
 import warnings
 logger = logging.getLogger("GP")
 class MultioutputGP(GP):
    """
    General purpose Gaussian process model
    :param X: input observations
    :param Y: output observations
    :param kernel: a GPy kernel, defaults to rbf+white
    :param likelihood: a GPy likelihood
    :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
    :rtype: model object
    :param Norm normalizer:
        normalize the outputs Y.
        Prediction will be un-normalized using this normalizer.
        If normalizer is None, we will normalize using Standardize.
        If normalizer is False, no normalization will be done.
    .. Note:: Multiple independent outputs are allowed using columns of Y
    """
    def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None):
        #Input and Output
        X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
        Ny = len(Y_list)
        assert isinstance(kernel_list, list)
        kernel = kern.MultioutputKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances)
        assert isinstance(likelihood_list, list)
        likelihood = likelihoods.MultioutputLikelihood(likelihood_list)
        if inference_method is None:
            if all([isinstance(l, Gaussian) for l in likelihood_list]):
                inference_method = exact_gaussian_inference.ExactGaussianInference()
            else:
                inference_method = expectation_propagation.EP() 
        super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)# expectation_propagation.MultioutputEP()) # expectation_propagation.EP())                             
                                            #expectation_propagation.MultioutputEP())
    def predict_noiseless(self,  Xnew, full_cov=False, Y_metadata=None, kern=None):
        if isinstance(Xnew, list):
            Xnew, _, ind  = util.multioutput.build_XY(Xnew,None)
            if Y_metadata is None:
                Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
        return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern)
    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
        if isinstance(Xnew, list):
            Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
            if Y_metadata is None:
                Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
        return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood)
    def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
        if isinstance(X, list):
            X, _, ind  = util.multioutput.build_XY(X,None)
            if Y_metadata is None:
                Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
        return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
    def predictive_gradients(self, Xnew, kern=None):
        if isinstance(Xnew, list):
            Xnew, _, ind  = util.multioutput.build_XY(Xnew, None)
            #if Y_metadata is None:
                #Y_metadata={'output_index': ind}
        return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
    def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
        """
        Compute the derivatives of the predicted latent function with respect to X*
        Given a set of points at which to predict X* (size [N*,Q]), compute the
        derivatives of the mean and variance. Resulting arrays are sized:
         dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
        Note that this is not the same as computing the mean and variance of the derivative of the function!
         dv_dX*  -- [N*, Q],    (since all outputs have the same variance)
        :param X: The points at which to get the predictive gradients
        :type X: np.ndarray (Xnew x self.input_dim)
        :returns: dmu_dX, dv_dX
        :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
        """
        if isinstance(Xnew, list):
            Xnew, _, ind  = util.multioutput.build_XY(Xnew, None)
        slices = index_to_slices(Xnew[:,-1])
        for i in range(len(slices)):
            if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
                assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
        if kern is None:
            kern = self.kern
        mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
        for i in range(self.output_dim):
            mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
        # gradients wrt the diagonal part k_{xx}
        dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1]
        #grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
        if self.posterior.woodbury_inv.ndim == 3:
            tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
            tmp[:] = dv_dX[:,:,None]
            for i in range(self.posterior.woodbury_inv.shape[2]):
                alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
                tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
        else:
            tmp = dv_dX
            alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
            tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1]
        return mean_jac, tmp
    def log_predictive_density(self, x_test, y_test, Y_metadata=None):
        if isinstance(x_test, list):
            x_test, y_test, ind  = util.multioutput.build_XY(x_test, y_test)
            if Y_metadata is None:
                Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
        return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata)
    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.output_index  = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.output_index  = util.multioutput.build_XY(Y, Y)      
        self.update_model(False)
        if Y is not None:
            self.Y = ObsAr(Y)
            self.Y_normalized = self.Y
        if X is not None:
            self.X = ObsAr(X)
        self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)}
        if isinstance(self.inference_method, expectation_propagation.EP):      
            self.inference_method.reset()
        self.update_model(True)
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -1180,6 +1180,67 @@ class GradientTests(np.testing.TestCase):
        with self.assertRaises(RuntimeError):
            m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
    def test_multioutput_model_with_derivative_observations(self):
        f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
        fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
        N=10
        M=10
        sigma=0.05
        sigmader=0.05
        x = np.array([np.linspace(1,10,N)]).T
        y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
        xd = np.array([np.linspace(2,8,M)]).T
        yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
        # squared exponential kernel:
        se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
        # We need to generate separate kernel for the derivative observations and give the created kernel as an input:
        se_der = GPy.kern.DiffKern(se, 0)
        #Then 
        gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
        gauss = GPy.likelihoods.Gaussian(variance=0.1)
        gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
        # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
        # Now we have the regular observations first and derivative observations second, meaning that the kernels and
        # the likelihoods must follow the same order
        m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
        m.randomize()
        self.assertTrue(m.checkgrad())
        m.optimize(messages=0, ipython_notebook=False)
        self.assertTrue(m.checkgrad())
    def test_multioutput_model_with_ep(self):
        f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
        fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
        N=10
        sigma=0.05
        sigmader=0.05
        x = np.array([np.linspace(1,10,N)]).T
        y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
        M=7
        xd = np.array([np.linspace(2,8,M)]).T
        yd = 2*(fd(xd)>0) -1
        # squared exponential kernel:
        se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
        # We need to generate separate kernel for the derivative observations and give the created kernel as an input:
        se_der = GPy.kern.DiffKern(se, 0)
        #Then 
        gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
        probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100))
        # Then create the model, we give everything in lists
        m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested"))
        self.assertTrue(m.checkgrad())       
 def _create_missing_data_model(kernel, Q):
    D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3