Added multioutput_gp model and tests for it

This commit is contained in:
Siivola Eero 2018-09-05 16:37:40 +03:00
parent 101e93b73e
commit 0daad96da1
3 changed files with 216 additions and 0 deletions

View file

@ -30,3 +30,4 @@ from .gp_grid_regression import GPRegressionGrid
from .gp_multiout_regression import GPMultioutRegression from .gp_multiout_regression import GPMultioutRegression
from .gp_multiout_regression_md import GPMultioutRegressionMD from .gp_multiout_regression_md import GPMultioutRegressionMD
from .tp_regression import TPRegression from .tp_regression import TPRegression
from .multioutput_gp import MultioutputGP

View file

@ -0,0 +1,154 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from ..core.model import Model
from ..core.parameterization.variational import VariationalPosterior
from ..core.mapping import Mapping
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..util.normalizer import Standardize
from .. import util
from paramz import ObsAr
from ..core.gp import GP
from GPy.util.multioutput import index_to_slices
import logging
import warnings
logger = logging.getLogger("GP")
class MultioutputGP(GP):
"""
General purpose Gaussian process model
:param X: input observations
:param Y: output observations
:param kernel: a GPy kernel, defaults to rbf+white
:param likelihood: a GPy likelihood
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
:rtype: model object
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using Standardize.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X_list, Y_list, kernel_list, likelihood_list, name='multioutputgp', kernel_cross_covariances={}, inference_method=None):
#Input and Output
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
assert isinstance(kernel_list, list)
kernel = kern.MultioutputKern(kernels=kernel_list, cross_covariances=kernel_cross_covariances)
assert isinstance(likelihood_list, list)
likelihood = likelihoods.MultioutputLikelihood(likelihood_list)
if inference_method is None:
if all([isinstance(l, Gaussian) for l in likelihood_list]):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation.EP()
super(MultioutputGP, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index, 'trials':np.ones(self.output_index.shape)}, inference_method = inference_method)# expectation_propagation.MultioutputEP()) # expectation_propagation.EP())
#expectation_propagation.MultioutputEP())
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_noiseless(Xnew, full_cov, Y_metadata, kern)
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, likelihood=None, include_likelihood=True):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict(Xnew, full_cov, Y_metadata, kern, likelihood, include_likelihood)
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None, likelihood=None):
if isinstance(X, list):
X, _, ind = util.multioutput.build_XY(X,None)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).predict_quantiles(X, quantiles, Y_metadata, kern, likelihood)
def predictive_gradients(self, Xnew, kern=None):
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
#if Y_metadata is None:
#Y_metadata={'output_index': ind}
return super(MultioutputGP, self).predictive_gradients(Xnew, kern)
def predictive_gradients(self, Xnew, kern=None): #XNEW IS NOT A LIST!!
"""
Compute the derivatives of the predicted latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
"""
if isinstance(Xnew, list):
Xnew, _, ind = util.multioutput.build_XY(Xnew, None)
slices = index_to_slices(Xnew[:,-1])
for i in range(len(slices)):
if ((self.kern.kern[i].name == 'diffKern' ) and len(slices[i])>0):
assert 0, "It is not (yet) possible to predict gradients of gradient observations, sorry :)"
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1]-1,self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)[:,0:-1]
# gradients wrt the diagonal part k_{xx}
dv_dX = kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)[:,0:-1]
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
if self.posterior.woodbury_inv.ndim == 3:
tmp = np.empty(dv_dX.shape + (self.posterior.woodbury_inv.shape[2],))
tmp[:] = dv_dX[:,:,None]
for i in range(self.posterior.woodbury_inv.shape[2]):
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv[:, :, i])
tmp[:, :, i] += kern.gradients_X(alpha, Xnew, self._predictive_variable)
else:
tmp = dv_dX
alpha = -2.*np.dot(kern.K(Xnew, self._predictive_variable), self.posterior.woodbury_inv)
tmp += kern.gradients_X(alpha, Xnew, self._predictive_variable)[:,0:-1]
return mean_jac, tmp
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
if isinstance(x_test, list):
x_test, y_test, ind = util.multioutput.build_XY(x_test, y_test)
if Y_metadata is None:
Y_metadata={'output_index': ind, 'trials': np.ones(ind.shape)}
return super(MultioutputGP, self).log_predictive_density(x_test, y_test, Y_metadata)
def set_XY(self, X=None, Y=None):
if isinstance(X, list):
X, _, self.output_index = util.multioutput.build_XY(X, None)
if isinstance(Y, list):
_, Y, self.output_index = util.multioutput.build_XY(Y, Y)
self.update_model(False)
if Y is not None:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
if X is not None:
self.X = ObsAr(X)
self.Y_metadata={'output_index': self.output_index, 'trials': np.ones(self.output_index.shape)}
if isinstance(self.inference_method, expectation_propagation.EP):
self.inference_method.reset()
self.update_model(True)

View file

@ -1180,6 +1180,67 @@ class GradientTests(np.testing.TestCase):
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]])) m.posterior_covariance_between_points(np.array([[1], [2]]), np.array([[3], [4]]))
def test_multioutput_model_with_derivative_observations(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
M=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
xd = np.array([np.linspace(2,8,M)]).T
yd = fd(xd) + np.array(sigmader*np.random.normal(0,1,(M,1)))
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
gauss = GPy.likelihoods.Gaussian(variance=0.1)
gauss_der = GPy.likelihoods.Gaussian(variance=sigma**2)
# Then create the model, we give everything in lists, the order of the inputs indicates the order of the outputs
# Now we have the regular observations first and derivative observations second, meaning that the kernels and
# the likelihoods must follow the same order
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, gauss])
m.randomize()
self.assertTrue(m.checkgrad())
m.optimize(messages=0, ipython_notebook=False)
self.assertTrue(m.checkgrad())
def test_multioutput_model_with_ep(self):
f = lambda x: np.sin(x)+0.1*(x-2.)**2-0.005*x**3
fd = lambda x: np.cos(x)+0.2*(x-2.)-0.015*x**2
N=10
sigma=0.05
sigmader=0.05
x = np.array([np.linspace(1,10,N)]).T
y = f(x) + np.array(sigma*np.random.normal(0,1,(N,1)))
M=7
xd = np.array([np.linspace(2,8,M)]).T
yd = 2*(fd(xd)>0) -1
# squared exponential kernel:
se = GPy.kern.RBF(input_dim = 1, lengthscale=1.5, variance=0.2)
# We need to generate separate kernel for the derivative observations and give the created kernel as an input:
se_der = GPy.kern.DiffKern(se, 0)
#Then
gauss = GPy.likelihoods.Gaussian(variance=sigma**2)
probit = GPy.likelihoods.Binomial(gp_link = GPy.likelihoods.link_functions.ScaledProbit(nu=100))
# Then create the model, we give everything in lists
m = GPy.models.MultioutputGP(X_list=[x, xd], Y_list=[y, yd], kernel_list=[se, se_der], likelihood_list = [gauss, probit], inference_method=GPy.inference.latent_function_inference.EP(ep_mode="nested"))
self.assertTrue(m.checkgrad())
def _create_missing_data_model(kernel, Q): def _create_missing_data_model(kernel, Q):
D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3 D1, D2, D3, N, num_inducing = 13, 5, 8, 400, 3