diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index f7d80c61..3e788927 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid from .gp_multiout_regression import GPMultioutRegression from .gp_multiout_regression_md import GPMultioutRegressionMD from .tp_regression import TPRegression +from .multioutput_gp import MultioutputGP diff --git a/GPy/models/multioutput_gp.py b/GPy/models/multioutput_gp.py new file mode 100644 index 00000000..468197cf --- /dev/null +++ b/GPy/models/multioutput_gp.py @@ -0,0 +1,154 @@ +# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import itertools +from ..core.model import Model +from ..core.parameterization.variational import VariationalPosterior +from ..core.mapping import Mapping +from .. import likelihoods +from ..likelihoods.gaussian import Gaussian +from .. import kern +from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation +from ..util.normalizer import Standardize +from .. import util +from paramz import ObsAr +from ..core.gp import GP + +from GPy.util.multioutput import index_to_slices +import logging +import warnings +logger = logging.getLogger("GP") + +class MultioutputGP(GP): + """ + General purpose Gaussian process model + :param X: input observations + :param Y: output observations + :param kernel: a GPy kernel, defaults to rbf+white + :param likelihood: a GPy likelihood + :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP + :rtype: model object + :param Norm normalizer: + normalize the outputs Y. + Prediction will be un-normalized using this normalizer. + If normalizer is None, we will normalize using Standardize. + If normalizer is False, no normalization will be done. + .. Note:: Multiple independent outputs are allowed using columns of Y + """ + def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None): + #Input and Output + X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list) + Ny = len(Y_list) + + assert isinstance(kernel_list, list) + kernel = kern.MultioutputKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances) + + assert isinstance(likelihood_list, list) + likelihood = likelihoods.MultioutputLikelihood(likelihood_list) + + if inference_method is None: + if all([isinstance(l, Gaussian) for l in likelihood_list]): + inference_method = exact_gaussian_inference.ExactGaussianInference() + else: + inference_method = expectation_propagation.EP() + + super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)# expectation_propagation.MultioutputEP()) # expectation_propagation.EP()) + #expectation_propagation.MultioutputEP()) + + def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern) + + def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood) + + def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None): + if isinstance(X, list): + X, _, ind = util.multioutput.build_XY(X,None) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood) + + def predictive_gradients(self, Xnew, kern=None): + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew, None) + #if Y_metadata is None: + #Y_metadata={'output_index': ind} + return super(MultioutputGP, self).predictive_gradients(Xnew, kern) + + def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!! + """ + Compute the derivatives of the predicted latent function with respect to X* + Given a set of points at which to predict X* (size [N*,Q]), compute the + derivatives of the mean and variance. Resulting arrays are sized: + dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). + Note that this is not the same as computing the mean and variance of the derivative of the function! + dv_dX* -- [N*, Q], (since all outputs have the same variance) + :param X: The points at which to get the predictive gradients + :type X: np.ndarray (Xnew x self.input_dim) + :returns: dmu_dX, dv_dX + :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ] + """ + + if isinstance(Xnew, list): + Xnew, _, ind = util.multioutput.build_XY(Xnew, None) + + slices = index_to_slices(Xnew[:,-1]) + + for i in range(len(slices)): + if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0): + assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)" + + if kern is None: + kern = self.kern + mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim)) + for i in range(self.output_dim): + mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1] + + # gradients wrt the diagonal part k_{xx} + dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1] + #grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx} + if self.posterior.woodbury_inv.ndim == 3: + tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],)) + tmp[:] = dv_dX[:,:,None] + for i in range(self.posterior.woodbury_inv.shape[2]): + alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i]) + tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable) + else: + tmp = dv_dX + alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv) + tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1] + return mean_jac, tmp + + def log_predictive_density(self, x_test, y_test, Y_metadata=None): + if isinstance(x_test, list): + x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test) + if Y_metadata is None: + Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)} + return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata) + + def set_XY(self, X=None, Y=None): + if isinstance(X, list): + X, _, self.output_index = util.multioutput.build_XY(X, None) + if isinstance(Y, list): + _, Y, self.output_index = util.multioutput.build_XY(Y, Y) + + self.update_model(False) + if Y is not None: + self.Y = ObsAr(Y) + self.Y_normalized = self.Y + if X is not None: + self.X = ObsAr(X) + + self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)} + if isinstance(self.inference_method, expectation_propagation.EP): + self.inference_method.reset() + self.update_model(True) \ No newline at end of file diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 9187f209..bd438569 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -1180,6 +1180,67 @@ class GradientTests(np.testing.TestCase): with self.assertRaises(RuntimeError): m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) + + def test_multioutput_model_with_derivative_observations(self): + f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 + fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 + N=10 + M=10 + sigma=0.05 + sigmader=0.05 + x = np.array([np.linspace(1,10,N)]).T + y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) + + xd = np.array([np.linspace(2,8,M)]).T + yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1))) + + # squared exponential kernel: + se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) + # We need to generate separate kernel for the derivative observations and give the created kernel as an input: + se_der = GPy.kern.DiffKern(se, 0) + + #Then + gauss = GPy.likelihoods.Gaussian(variance=sigma**2) + gauss = GPy.likelihoods.Gaussian(variance=0.1) + gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2) + + # Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs + # Now we have the regular observations first and derivative observations second, meaning that the kernels and + # the likelihoods must follow the same order + m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss]) + m.randomize() + self.assertTrue(m.checkgrad()) + + m.optimize(messages=0, ipython_notebook=False) + + self.assertTrue(m.checkgrad()) + + def test_multioutput_model_with_ep(self): + f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3 + fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2 + N=10 + sigma=0.05 + sigmader=0.05 + x = np.array([np.linspace(1,10,N)]).T + y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1))) + + M=7 + xd = np.array([np.linspace(2,8,M)]).T + yd = 2*(fd(xd)>0) -1 + + # squared exponential kernel: + se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2) + # We need to generate separate kernel for the derivative observations and give the created kernel as an input: + se_der = GPy.kern.DiffKern(se, 0) + + #Then + gauss = GPy.likelihoods.Gaussian(variance=sigma**2) + probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100)) + + # Then create the model, we give everything in lists + m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested")) + + self.assertTrue(m.checkgrad()) def _create_missing_data_model(kernel, Q): D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3