2014-11-21 11:40:50 +00:00
|
|
|
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
2013-06-05 14:11:49 +01:00
|
|
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
2014-01-28 13:39:59 +00:00
|
|
|
import sys
|
2014-01-22 15:06:53 +00:00
|
|
|
from .. import kern
|
2015-02-26 07:14:40 +00:00
|
|
|
from .model import Model
|
|
|
|
|
from .parameterization import ObsAr
|
2015-04-01 13:03:48 +01:00
|
|
|
from .mapping import Mapping
|
2013-12-05 15:09:31 -05:00
|
|
|
from .. import likelihoods
|
2014-11-11 10:21:58 +00:00
|
|
|
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
2015-02-26 07:14:40 +00:00
|
|
|
from .parameterization.variational import VariationalPosterior
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-06-27 16:18:41 -07:00
|
|
|
import logging
|
2015-04-10 15:44:15 +01:00
|
|
|
import warnings
|
2014-08-27 15:47:41 -07:00
|
|
|
from GPy.util.normalizer import MeanNorm
|
2014-06-27 16:18:41 -07:00
|
|
|
logger = logging.getLogger("GP")
|
|
|
|
|
|
2014-01-22 15:06:53 +00:00
|
|
|
class GP(Model):
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
2014-01-22 15:06:53 +00:00
|
|
|
General purpose Gaussian process model
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
:param X: input observations
|
2014-01-22 15:06:53 +00:00
|
|
|
:param Y: output observations
|
2013-06-05 14:11:49 +01:00
|
|
|
:param kernel: a GPy kernel, defaults to rbf+white
|
2013-09-20 17:46:23 +01:00
|
|
|
:param likelihood: a GPy likelihood
|
2014-11-05 14:51:12 +00:00
|
|
|
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
|
2013-06-05 14:11:49 +01:00
|
|
|
:rtype: model object
|
2014-08-27 12:05:13 -07:00
|
|
|
:param Norm normalizer:
|
|
|
|
|
normalize the outputs Y.
|
|
|
|
|
Prediction will be un-normalized using this normalizer.
|
2014-08-27 15:47:41 -07:00
|
|
|
If normalizer is None, we will normalize using MeanNorm.
|
2014-08-27 12:05:13 -07:00
|
|
|
If normalizer is False, no normalization will be done.
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
|
|
|
|
|
2014-01-22 15:06:53 +00:00
|
|
|
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
2015-03-23 14:56:19 +00:00
|
|
|
def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
|
2014-01-24 10:24:17 +00:00
|
|
|
super(GP, self).__init__(name)
|
2014-01-22 15:06:53 +00:00
|
|
|
|
|
|
|
|
assert X.ndim == 2
|
2014-03-17 17:10:06 +00:00
|
|
|
if isinstance(X, (ObsAr, VariationalPosterior)):
|
2014-05-21 16:32:44 +01:00
|
|
|
self.X = X.copy()
|
2014-06-27 15:19:11 -07:00
|
|
|
else: self.X = ObsAr(X)
|
2014-02-24 15:44:11 +00:00
|
|
|
|
2014-01-22 15:06:53 +00:00
|
|
|
self.num_data, self.input_dim = self.X.shape
|
|
|
|
|
|
|
|
|
|
assert Y.ndim == 2
|
2014-06-27 16:18:41 -07:00
|
|
|
logger.info("initializing Y")
|
2014-08-27 12:05:13 -07:00
|
|
|
|
2014-09-11 16:18:13 +01:00
|
|
|
if normalizer is True:
|
2014-08-27 15:47:41 -07:00
|
|
|
self.normalizer = MeanNorm()
|
2014-08-27 12:05:13 -07:00
|
|
|
elif normalizer is False:
|
|
|
|
|
self.normalizer = None
|
|
|
|
|
else:
|
|
|
|
|
self.normalizer = normalizer
|
|
|
|
|
|
|
|
|
|
if self.normalizer is not None:
|
|
|
|
|
self.normalizer.scale_by(Y)
|
|
|
|
|
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
|
|
|
|
|
self.Y = Y
|
|
|
|
|
else:
|
|
|
|
|
self.Y = ObsAr(Y)
|
|
|
|
|
self.Y_normalized = self.Y
|
|
|
|
|
|
2015-04-10 15:44:15 +01:00
|
|
|
if Y.shape[0] != self.num_data:
|
|
|
|
|
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
|
|
|
|
#function values
|
|
|
|
|
warnings.warn("There are more rows in your input data X, \
|
|
|
|
|
than in your output data Y, be VERY sure this is what you want")
|
2014-01-22 15:06:53 +00:00
|
|
|
_, self.output_dim = self.Y.shape
|
|
|
|
|
|
2015-04-10 15:44:15 +01:00
|
|
|
assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
|
2014-03-18 12:28:46 +00:00
|
|
|
self.Y_metadata = Y_metadata
|
2014-02-12 15:53:38 +00:00
|
|
|
|
2014-02-19 15:00:48 +00:00
|
|
|
assert isinstance(kernel, kern.Kern)
|
2014-03-07 16:59:41 +00:00
|
|
|
#assert self.input_dim == kernel.input_dim
|
2014-01-22 15:06:53 +00:00
|
|
|
self.kern = kernel
|
|
|
|
|
|
|
|
|
|
assert isinstance(likelihood, likelihoods.Likelihood)
|
|
|
|
|
self.likelihood = likelihood
|
2013-12-05 15:09:31 -05:00
|
|
|
|
2015-03-23 14:56:19 +00:00
|
|
|
#handle the mean function
|
|
|
|
|
self.mean_function = mean_function
|
|
|
|
|
if mean_function is not None:
|
|
|
|
|
assert isinstance(self.mean_function, Mapping)
|
|
|
|
|
assert mean_function.input_dim == self.input_dim
|
|
|
|
|
assert mean_function.output_dim == self.output_dim
|
2015-03-26 08:48:56 +00:00
|
|
|
self.link_parameter(mean_function)
|
2015-03-23 14:56:19 +00:00
|
|
|
|
2013-12-10 12:17:59 -08:00
|
|
|
#find a sensible inference method
|
2014-06-27 16:18:41 -07:00
|
|
|
logger.info("initializing inference method")
|
2013-12-05 15:09:31 -05:00
|
|
|
if inference_method is None:
|
2014-03-12 12:52:52 +00:00
|
|
|
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
|
2013-12-05 15:09:31 -05:00
|
|
|
inference_method = exact_gaussian_inference.ExactGaussianInference()
|
2014-01-29 17:02:44 +00:00
|
|
|
else:
|
2014-03-14 11:47:23 +00:00
|
|
|
inference_method = expectation_propagation.EP()
|
2015-02-26 08:11:11 +00:00
|
|
|
print("defaulting to ", inference_method, "for latent function inference")
|
2014-01-24 10:24:17 +00:00
|
|
|
self.inference_method = inference_method
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-06-27 16:18:41 -07:00
|
|
|
logger.info("adding kernel and likelihood as parameters")
|
2014-09-08 08:57:28 +01:00
|
|
|
self.link_parameter(self.kern)
|
|
|
|
|
self.link_parameter(self.likelihood)
|
2015-04-17 12:17:30 +02:00
|
|
|
self.posterior = None
|
|
|
|
|
|
2014-10-06 11:49:02 +01:00
|
|
|
|
2015-03-13 09:47:36 +00:00
|
|
|
def set_XY(self, X=None, Y=None, trigger_update=True):
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2014-11-05 14:51:12 +00:00
|
|
|
Set the input / output data of the model
|
|
|
|
|
This is useful if we wish to change our existing data but maintain the same model
|
2014-11-11 10:20:34 +00:00
|
|
|
|
2014-11-03 17:26:31 +00:00
|
|
|
:param X: input observations
|
2014-11-05 14:51:12 +00:00
|
|
|
:type X: np.ndarray
|
2014-11-03 17:26:31 +00:00
|
|
|
:param Y: output observations
|
2014-11-05 14:51:12 +00:00
|
|
|
:type Y: np.ndarray
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2015-03-13 09:47:36 +00:00
|
|
|
if trigger_update: self.update_model(False)
|
2014-11-03 17:26:31 +00:00
|
|
|
if Y is not None:
|
|
|
|
|
if self.normalizer is not None:
|
|
|
|
|
self.normalizer.scale_by(Y)
|
|
|
|
|
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
|
|
|
|
|
self.Y = Y
|
|
|
|
|
else:
|
|
|
|
|
self.Y = ObsAr(Y)
|
|
|
|
|
self.Y_normalized = self.Y
|
|
|
|
|
if X is not None:
|
|
|
|
|
if self.X in self.parameters:
|
|
|
|
|
# LVM models
|
|
|
|
|
if isinstance(self.X, VariationalPosterior):
|
2014-11-03 17:37:33 +00:00
|
|
|
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
|
2014-11-03 17:26:31 +00:00
|
|
|
self.unlink_parameter(self.X)
|
|
|
|
|
self.X = X
|
|
|
|
|
self.link_parameters(self.X)
|
|
|
|
|
else:
|
|
|
|
|
self.unlink_parameter(self.X)
|
|
|
|
|
from ..core import Param
|
|
|
|
|
self.X = Param('latent mean',X)
|
|
|
|
|
self.link_parameters(self.X)
|
|
|
|
|
else:
|
|
|
|
|
self.X = ObsAr(X)
|
2015-03-13 09:47:36 +00:00
|
|
|
if trigger_update: self.update_model(True)
|
|
|
|
|
if trigger_update: self._trigger_params_changed()
|
2014-11-03 17:26:31 +00:00
|
|
|
|
2015-03-13 09:47:36 +00:00
|
|
|
def set_X(self,X, trigger_update=True):
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2014-11-05 14:51:12 +00:00
|
|
|
Set the input data of the model
|
|
|
|
|
|
|
|
|
|
:param X: input observations
|
|
|
|
|
:type X: np.ndarray
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2015-03-13 09:47:36 +00:00
|
|
|
self.set_XY(X=X, trigger_update=trigger_update)
|
2014-09-24 14:36:25 +01:00
|
|
|
|
2015-03-13 09:47:36 +00:00
|
|
|
def set_Y(self,Y, trigger_update=True):
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2014-11-05 14:51:12 +00:00
|
|
|
Set the output data of the model
|
|
|
|
|
|
|
|
|
|
:param X: output observations
|
|
|
|
|
:type X: np.ndarray
|
2014-11-03 17:26:31 +00:00
|
|
|
"""
|
2015-03-13 09:47:36 +00:00
|
|
|
self.set_XY(Y=Y, trigger_update=trigger_update)
|
2014-01-24 10:24:17 +00:00
|
|
|
|
2013-10-17 14:38:43 +01:00
|
|
|
def parameters_changed(self):
|
2014-11-05 14:51:12 +00:00
|
|
|
"""
|
|
|
|
|
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
|
|
|
|
|
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
|
|
|
|
|
|
|
|
|
|
.. warning::
|
|
|
|
|
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
|
|
|
|
|
this method yourself, there may be unexpected consequences.
|
|
|
|
|
"""
|
2015-03-23 14:56:19 +00:00
|
|
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
|
2014-03-13 12:13:00 +00:00
|
|
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
2014-03-13 09:07:56 +00:00
|
|
|
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
|
2015-03-26 08:48:56 +00:00
|
|
|
if self.mean_function is not None:
|
|
|
|
|
self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
|
2014-03-12 12:06:21 +00:00
|
|
|
|
2013-06-05 14:11:49 +01:00
|
|
|
def log_likelihood(self):
|
2014-11-05 14:51:12 +00:00
|
|
|
"""
|
|
|
|
|
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
|
|
|
|
"""
|
2014-01-27 15:37:20 +00:00
|
|
|
return self._log_marginal_likelihood
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-04-30 12:11:41 +01:00
|
|
|
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
2014-03-21 14:22:42 +00:00
|
|
|
For making predictions, does not account for normalization or likelihood
|
2013-12-04 20:12:40 +00:00
|
|
|
|
|
|
|
|
full_cov is a boolean which defines whether the full covariance matrix
|
|
|
|
|
of the prediction is computed. If full_cov is False (default), only the
|
|
|
|
|
diagonal of the covariance is returned.
|
|
|
|
|
|
2014-11-05 14:51:12 +00:00
|
|
|
.. math::
|
|
|
|
|
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
|
|
|
|
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
|
|
|
|
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
2014-04-30 12:11:41 +01:00
|
|
|
if kern is None:
|
|
|
|
|
kern = self.kern
|
|
|
|
|
|
|
|
|
|
Kx = kern.K(_Xnew, self.X).T
|
2014-02-05 17:52:17 +00:00
|
|
|
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
2014-02-05 17:12:52 +00:00
|
|
|
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
2013-06-05 14:11:49 +01:00
|
|
|
if full_cov:
|
2014-04-30 12:11:41 +01:00
|
|
|
Kxx = kern.K(_Xnew)
|
2014-03-21 14:22:42 +00:00
|
|
|
var = Kxx - np.dot(Kx.T, WiKx)
|
2013-06-05 14:11:49 +01:00
|
|
|
else:
|
2014-04-30 12:11:41 +01:00
|
|
|
Kxx = kern.Kdiag(_Xnew)
|
2014-02-05 17:52:17 +00:00
|
|
|
var = Kxx - np.sum(WiKx*Kx, 0)
|
2013-12-05 15:09:31 -05:00
|
|
|
var = var.reshape(-1, 1)
|
2015-05-21 14:18:32 +01:00
|
|
|
var[var<0.] = 0.
|
2015-05-16 22:39:11 +01:00
|
|
|
|
2014-03-14 12:19:25 +00:00
|
|
|
#force mu to be a column vector
|
|
|
|
|
if len(mu.shape)==1: mu = mu[:,None]
|
2015-03-23 14:59:08 +00:00
|
|
|
|
|
|
|
|
#add the mean function in
|
|
|
|
|
if not self.mean_function is None:
|
|
|
|
|
mu += self.mean_function.f(_Xnew)
|
2013-06-05 14:11:49 +01:00
|
|
|
return mu, var
|
|
|
|
|
|
2014-04-30 12:11:41 +01:00
|
|
|
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
2013-06-05 14:11:49 +01:00
|
|
|
"""
|
|
|
|
|
Predict the function(s) at the new point(s) Xnew.
|
2013-09-20 17:46:23 +01:00
|
|
|
|
2013-06-05 14:11:49 +01:00
|
|
|
:param Xnew: The points at which to make a prediction
|
2014-11-05 14:51:12 +00:00
|
|
|
:type Xnew: np.ndarray (Nnew x self.input_dim)
|
2014-01-28 14:45:00 +00:00
|
|
|
:param full_cov: whether to return the full covariance matrix, or just
|
|
|
|
|
the diagonal
|
2013-06-05 14:11:49 +01:00
|
|
|
:type full_cov: bool
|
2014-04-30 12:11:41 +01:00
|
|
|
:param Y_metadata: metadata about the predicting point to pass to the likelihood
|
|
|
|
|
:param kern: The kernel to use for prediction (defaults to the model
|
|
|
|
|
kern). this is useful for examining e.g. subprocesses.
|
2015-05-11 11:21:45 +01:00
|
|
|
:returns: (mean, var):
|
2014-11-05 14:51:12 +00:00
|
|
|
mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
|
|
|
|
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
|
|
|
|
This is to allow for different normalizations of the output dimensions.
|
2015-05-11 11:21:45 +01:00
|
|
|
|
2015-05-11 11:26:25 +01:00
|
|
|
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
|
2014-04-30 12:11:41 +01:00
|
|
|
"""
|
2014-02-13 08:53:14 +00:00
|
|
|
#predict the latent function values
|
2014-04-30 12:11:41 +01:00
|
|
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
2014-08-27 13:07:23 -07:00
|
|
|
if self.normalizer is not None:
|
|
|
|
|
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
|
|
|
|
# now push through likelihood
|
2015-07-22 18:32:12 +01:00
|
|
|
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
|
2014-08-27 13:07:23 -07:00
|
|
|
return mean, var
|
2014-03-13 14:42:03 +00:00
|
|
|
|
2014-03-13 15:35:54 +00:00
|
|
|
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
|
2014-11-05 14:51:12 +00:00
|
|
|
"""
|
|
|
|
|
Get the predictive quantiles around the prediction at X
|
|
|
|
|
|
|
|
|
|
:param X: The points at which to make a prediction
|
|
|
|
|
:type X: np.ndarray (Xnew x self.input_dim)
|
|
|
|
|
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
|
|
|
|
:type quantiles: tuple
|
|
|
|
|
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
2015-05-11 11:21:45 +01:00
|
|
|
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
2014-11-05 14:51:12 +00:00
|
|
|
"""
|
2014-03-13 14:42:03 +00:00
|
|
|
m, v = self._raw_predict(X, full_cov=False)
|
2014-08-27 13:07:23 -07:00
|
|
|
if self.normalizer is not None:
|
|
|
|
|
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
2015-07-22 18:32:12 +01:00
|
|
|
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
2013-06-05 14:11:49 +01:00
|
|
|
|
2014-08-13 10:36:54 +01:00
|
|
|
def predictive_gradients(self, Xnew):
|
|
|
|
|
"""
|
2015-03-30 13:41:25 +01:00
|
|
|
Compute the derivatives of the predicted latent function with respect to X*
|
2014-08-13 10:36:54 +01:00
|
|
|
|
|
|
|
|
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
|
|
|
|
derivatives of the mean and variance. Resulting arrays are sized:
|
|
|
|
|
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
2014-11-05 14:51:12 +00:00
|
|
|
|
2015-03-30 13:41:25 +01:00
|
|
|
Note that this is not the same as computing the mean and variance of the derivative of the function!
|
|
|
|
|
|
2014-08-13 10:36:54 +01:00
|
|
|
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
2014-11-05 14:51:12 +00:00
|
|
|
:param X: The points at which to get the predictive gradients
|
|
|
|
|
:type X: np.ndarray (Xnew x self.input_dim)
|
|
|
|
|
:returns: dmu_dX, dv_dX
|
|
|
|
|
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
|
2014-08-13 10:36:54 +01:00
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
|
|
|
|
for i in range(self.output_dim):
|
|
|
|
|
dmu_dX[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self.X)
|
|
|
|
|
|
|
|
|
|
# gradients wrt the diagonal part k_{xx}
|
|
|
|
|
dv_dX = self.kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
|
|
|
|
|
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
|
|
|
|
|
alpha = -2.*np.dot(self.kern.K(Xnew, self.X),self.posterior.woodbury_inv)
|
|
|
|
|
dv_dX += self.kern.gradients_X(alpha, Xnew, self.X)
|
|
|
|
|
return dmu_dX, dv_dX
|
|
|
|
|
|
|
|
|
|
|
2014-02-19 17:37:18 +00:00
|
|
|
def posterior_samples_f(self,X,size=10, full_cov=True):
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
|
|
|
|
Samples the posterior GP at the points X.
|
|
|
|
|
|
|
|
|
|
:param X: The points at which to take the samples.
|
2014-11-05 14:51:12 +00:00
|
|
|
:type X: np.ndarray (Nnew x self.input_dim)
|
2014-01-28 13:39:59 +00:00
|
|
|
:param size: the number of a posteriori samples.
|
2014-01-22 15:06:53 +00:00
|
|
|
:type size: int.
|
|
|
|
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
|
|
|
|
:type full_cov: bool.
|
2015-04-10 10:40:18 +01:00
|
|
|
:returns: fsim: set of simulations
|
2014-11-05 14:51:12 +00:00
|
|
|
:rtype: np.ndarray (N x samples)
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
2014-02-19 17:37:18 +00:00
|
|
|
m, v = self._raw_predict(X, full_cov=full_cov)
|
2014-08-27 13:07:23 -07:00
|
|
|
if self.normalizer is not None:
|
|
|
|
|
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
2014-01-22 15:06:53 +00:00
|
|
|
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
|
|
|
|
if not full_cov:
|
2015-04-10 10:40:18 +01:00
|
|
|
fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
2014-01-22 15:06:53 +00:00
|
|
|
else:
|
2015-04-10 10:40:18 +01:00
|
|
|
fsim = np.random.multivariate_normal(m.flatten(), v, size).T
|
2014-01-22 15:06:53 +00:00
|
|
|
|
2015-04-10 10:40:18 +01:00
|
|
|
return fsim
|
2014-01-22 15:06:53 +00:00
|
|
|
|
2014-03-13 16:44:39 +00:00
|
|
|
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
|
|
|
|
Samples the posterior GP at the points X.
|
|
|
|
|
|
|
|
|
|
:param X: the points at which to take the samples.
|
2014-11-05 14:51:12 +00:00
|
|
|
:type X: np.ndarray (Nnew x self.input_dim.)
|
2014-01-28 13:39:59 +00:00
|
|
|
:param size: the number of a posteriori samples.
|
2014-01-22 15:06:53 +00:00
|
|
|
:type size: int.
|
|
|
|
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
|
|
|
|
:type full_cov: bool.
|
|
|
|
|
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
|
|
|
|
|
:type noise_model: integer.
|
|
|
|
|
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
|
|
|
|
"""
|
2015-04-10 10:40:18 +01:00
|
|
|
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
2015-07-22 18:32:12 +01:00
|
|
|
Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata)
|
2014-01-22 15:06:53 +00:00
|
|
|
return Ysim
|
|
|
|
|
|
2014-05-16 11:21:08 +01:00
|
|
|
def plot_f(self, plot_limits=None, which_data_rows='all',
|
|
|
|
|
which_data_ycols='all', fixed_inputs=[],
|
|
|
|
|
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
|
|
|
|
plot_raw=True,
|
2015-04-10 15:44:15 +01:00
|
|
|
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
|
|
|
|
|
apply_link=False):
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
2014-05-16 11:21:08 +01:00
|
|
|
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
|
|
|
|
This is a call to plot with plot_raw=True.
|
|
|
|
|
Data will not be plotted in this, as the GP's view of the world
|
|
|
|
|
may live in another space, or units then the data.
|
2014-11-05 14:51:12 +00:00
|
|
|
|
|
|
|
|
Can plot only part of the data and part of the posterior functions
|
|
|
|
|
using which_data_rowsm which_data_ycols.
|
|
|
|
|
|
|
|
|
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
|
|
|
:type plot_limits: np.array
|
|
|
|
|
:param which_data_rows: which of the training data to plot (default all)
|
|
|
|
|
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
|
|
|
|
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
|
|
|
|
:type which_data_ycols: 'all' or a list of integers
|
|
|
|
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
|
|
|
|
:type fixed_inputs: a list of tuples
|
|
|
|
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
|
|
|
:type resolution: int
|
|
|
|
|
:param levels: number of levels to plot in a contour plot.
|
|
|
|
|
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
|
|
|
|
:type levels: int
|
|
|
|
|
:param samples: the number of a posteriori samples to plot
|
|
|
|
|
:type samples: int
|
|
|
|
|
:param fignum: figure to plot on.
|
|
|
|
|
:type fignum: figure number
|
|
|
|
|
:param ax: axes to plot on.
|
|
|
|
|
:type ax: axes handle
|
|
|
|
|
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
|
|
|
|
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
|
|
|
|
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
|
|
|
|
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
|
|
|
|
:param Y_metadata: additional data associated with Y which may be needed
|
|
|
|
|
:type Y_metadata: dict
|
|
|
|
|
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
|
|
|
|
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
2015-04-10 15:44:15 +01:00
|
|
|
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
|
|
|
|
|
:type apply_link: boolean
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
2014-01-28 13:39:59 +00:00
|
|
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
|
|
|
from ..plotting.matplot_dep import models_plots
|
2014-05-16 11:21:08 +01:00
|
|
|
kw = {}
|
|
|
|
|
if linecol is not None:
|
|
|
|
|
kw['linecol'] = linecol
|
|
|
|
|
if fillcol is not None:
|
|
|
|
|
kw['fillcol'] = fillcol
|
|
|
|
|
return models_plots.plot_fit(self, plot_limits, which_data_rows,
|
2014-06-27 15:19:11 -07:00
|
|
|
which_data_ycols, fixed_inputs,
|
|
|
|
|
levels, samples, fignum, ax, resolution,
|
|
|
|
|
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
2015-04-10 15:44:15 +01:00
|
|
|
data_symbol=data_symbol, apply_link=apply_link, **kw)
|
2014-05-16 11:21:08 +01:00
|
|
|
|
|
|
|
|
def plot(self, plot_limits=None, which_data_rows='all',
|
|
|
|
|
which_data_ycols='all', fixed_inputs=[],
|
|
|
|
|
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
|
|
|
|
plot_raw=False,
|
2015-04-24 11:02:01 +02:00
|
|
|
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
|
2014-01-22 15:06:53 +00:00
|
|
|
"""
|
|
|
|
|
Plot the posterior of the GP.
|
2014-05-16 11:21:08 +01:00
|
|
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
|
|
|
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
|
|
|
|
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
2014-01-22 15:06:53 +00:00
|
|
|
|
|
|
|
|
Can plot only part of the data and part of the posterior functions
|
2014-05-16 11:21:08 +01:00
|
|
|
using which_data_rowsm which_data_ycols.
|
|
|
|
|
|
|
|
|
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
|
|
|
:type plot_limits: np.array
|
|
|
|
|
:param which_data_rows: which of the training data to plot (default all)
|
|
|
|
|
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
|
|
|
|
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
2014-11-05 14:51:12 +00:00
|
|
|
:type which_data_ycols: 'all' or a list of integers
|
2014-05-16 11:21:08 +01:00
|
|
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
|
|
|
|
:type fixed_inputs: a list of tuples
|
|
|
|
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
|
|
|
:type resolution: int
|
|
|
|
|
:param levels: number of levels to plot in a contour plot.
|
2014-11-05 14:51:12 +00:00
|
|
|
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
2014-05-16 11:21:08 +01:00
|
|
|
:type levels: int
|
|
|
|
|
:param samples: the number of a posteriori samples to plot
|
|
|
|
|
:type samples: int
|
|
|
|
|
:param fignum: figure to plot on.
|
|
|
|
|
:type fignum: figure number
|
|
|
|
|
:param ax: axes to plot on.
|
|
|
|
|
:type ax: axes handle
|
|
|
|
|
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
2014-11-05 14:51:12 +00:00
|
|
|
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
2014-05-16 11:21:08 +01:00
|
|
|
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
2014-11-05 14:51:12 +00:00
|
|
|
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
|
|
|
|
:param Y_metadata: additional data associated with Y which may be needed
|
|
|
|
|
:type Y_metadata: dict
|
|
|
|
|
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
|
|
|
|
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
2014-01-28 13:39:59 +00:00
|
|
|
"""
|
|
|
|
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
|
|
|
|
from ..plotting.matplot_dep import models_plots
|
2014-05-16 11:21:08 +01:00
|
|
|
kw = {}
|
|
|
|
|
if linecol is not None:
|
|
|
|
|
kw['linecol'] = linecol
|
|
|
|
|
if fillcol is not None:
|
|
|
|
|
kw['fillcol'] = fillcol
|
|
|
|
|
return models_plots.plot_fit(self, plot_limits, which_data_rows,
|
2014-06-27 15:19:11 -07:00
|
|
|
which_data_ycols, fixed_inputs,
|
|
|
|
|
levels, samples, fignum, ax, resolution,
|
|
|
|
|
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
2015-04-24 11:02:01 +02:00
|
|
|
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
|
2014-01-22 15:06:53 +00:00
|
|
|
|
2014-08-25 09:46:20 -07:00
|
|
|
def input_sensitivity(self, summarize=True):
|
2014-03-27 10:08:45 +00:00
|
|
|
"""
|
|
|
|
|
Returns the sensitivity for each dimension of this model
|
|
|
|
|
"""
|
2014-08-25 09:46:20 -07:00
|
|
|
return self.kern.input_sensitivity(summarize=summarize)
|
2014-03-27 10:08:45 +00:00
|
|
|
|
2014-05-15 14:06:00 +01:00
|
|
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
|
|
|
|
"""
|
|
|
|
|
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
|
|
|
|
kwargs are passed to the optimizer. They can be:
|
|
|
|
|
|
|
|
|
|
:param max_f_eval: maximum number of function evaluations
|
|
|
|
|
:type max_f_eval: int
|
|
|
|
|
:messages: whether to display during optimisation
|
|
|
|
|
:type messages: bool
|
2014-11-05 14:51:12 +00:00
|
|
|
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
|
2014-05-15 14:06:00 +01:00
|
|
|
:type optimizer: string
|
|
|
|
|
"""
|
|
|
|
|
self.inference_method.on_optimization_start()
|
2014-05-23 11:11:21 +01:00
|
|
|
try:
|
|
|
|
|
super(GP, self).optimize(optimizer, start, **kwargs)
|
|
|
|
|
except KeyboardInterrupt:
|
2015-02-26 08:11:11 +00:00
|
|
|
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
|
2014-05-23 11:11:21 +01:00
|
|
|
self.inference_method.on_optimization_end()
|
2014-06-27 15:19:11 -07:00
|
|
|
raise
|
2014-11-11 10:20:34 +00:00
|
|
|
|
2014-11-03 16:04:15 +00:00
|
|
|
def infer_newX(self, Y_new, optimize=True, ):
|
|
|
|
|
"""
|
|
|
|
|
Infer the distribution of X for the new observed data *Y_new*.
|
2014-11-11 10:20:34 +00:00
|
|
|
|
2014-11-03 16:04:15 +00:00
|
|
|
:param Y_new: the new observed data for inference
|
|
|
|
|
:type Y_new: numpy.ndarray
|
|
|
|
|
:param optimize: whether to optimize the location of new X (True by default)
|
|
|
|
|
:type optimize: boolean
|
2014-11-11 10:20:34 +00:00
|
|
|
:return: a tuple containing the posterior estimation of X and the model that optimize X
|
2014-11-05 14:51:12 +00:00
|
|
|
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
|
2014-11-03 16:04:15 +00:00
|
|
|
"""
|
|
|
|
|
from ..inference.latent_function_inference.inferenceX import infer_newX
|
|
|
|
|
return infer_newX(self, Y_new, optimize=optimize)
|
2015-04-27 18:56:20 +01:00
|
|
|
|
|
|
|
|
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
|
|
|
|
"""
|
|
|
|
|
Calculation of the log predictive density
|
|
|
|
|
|
|
|
|
|
.. math:
|
|
|
|
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
|
|
|
|
|
|
|
|
|
:param x_test: test locations (x_{*})
|
|
|
|
|
:type x_test: (Nx1) array
|
|
|
|
|
:param y_test: test observations (y_{*})
|
|
|
|
|
:type y_test: (Nx1) array
|
|
|
|
|
:param Y_metadata: metadata associated with the test points
|
|
|
|
|
"""
|
|
|
|
|
mu_star, var_star = self._raw_predict(x_test)
|
|
|
|
|
return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
|
|
|
|
|
|
|
|
|
|
def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
|
|
|
|
|
"""
|
|
|
|
|
Calculation of the log predictive density by sampling
|
|
|
|
|
|
|
|
|
|
.. math:
|
|
|
|
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
|
|
|
|
|
|
|
|
|
:param x_test: test locations (x_{*})
|
|
|
|
|
:type x_test: (Nx1) array
|
|
|
|
|
:param y_test: test observations (y_{*})
|
|
|
|
|
:type y_test: (Nx1) array
|
|
|
|
|
:param Y_metadata: metadata associated with the test points
|
|
|
|
|
:param num_samples: number of samples to use in monte carlo integration
|
|
|
|
|
:type num_samples: int
|
|
|
|
|
"""
|
|
|
|
|
mu_star, var_star = self._raw_predict(x_test)
|
|
|
|
|
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
|
|
|
|
|
|