GPy/GPy/core/gp.py

339 lines
14 KiB
Python
Raw Normal View History

2013-06-05 14:11:49 +01:00
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
2014-01-28 13:39:59 +00:00
import sys
2014-01-22 15:06:53 +00:00
import warnings
from .. import kern
2014-01-24 10:26:44 +00:00
from ..util.linalg import dtrtrs
2014-01-22 15:06:53 +00:00
from model import Model
from parameterization import ObsAr
from .. import likelihoods
2014-01-24 10:24:17 +00:00
from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior
2014-06-27 15:19:11 -07:00
from scipy.sparse.base import issparse
2013-06-05 14:11:49 +01:00
2014-06-27 16:18:41 -07:00
import logging
from GPy.util.normalizer import GaussianNorm
2014-06-27 16:18:41 -07:00
logger = logging.getLogger("GP")
2014-01-22 15:06:53 +00:00
class GP(Model):
2013-06-05 14:11:49 +01:00
"""
2014-01-22 15:06:53 +00:00
General purpose Gaussian process model
2013-06-05 14:11:49 +01:00
:param X: input observations
2014-01-22 15:06:53 +00:00
:param Y: output observations
2013-06-05 14:11:49 +01:00
:param kernel: a GPy kernel, defaults to rbf+white
2013-09-20 17:46:23 +01:00
:param likelihood: a GPy likelihood
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
2013-06-05 14:11:49 +01:00
:rtype: model object
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using GaussianNorm.
If normalizer is False, no normalization will be done.
2013-06-05 14:11:49 +01:00
.. Note:: Multiple independent outputs are allowed using columns of Y
2014-01-22 15:06:53 +00:00
2013-06-05 14:11:49 +01:00
"""
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
2014-01-24 10:24:17 +00:00
super(GP, self).__init__(name)
2014-01-22 15:06:53 +00:00
assert X.ndim == 2
if isinstance(X, (ObsAr, VariationalPosterior)):
2014-05-21 16:32:44 +01:00
self.X = X.copy()
2014-06-27 15:19:11 -07:00
else: self.X = ObsAr(X)
2014-01-22 15:06:53 +00:00
self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2
2014-06-27 16:18:41 -07:00
logger.info("initializing Y")
if normalizer is None:
self.normalizer = GaussianNorm()
elif normalizer is False:
self.normalizer = None
else:
self.normalizer = normalizer
if self.normalizer is not None:
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
2014-01-22 15:06:53 +00:00
assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape
#TODO: check the type of this is okay?
self.Y_metadata = Y_metadata
2014-02-12 15:53:38 +00:00
2014-02-19 15:00:48 +00:00
assert isinstance(kernel, kern.Kern)
#assert self.input_dim == kernel.input_dim
2014-01-22 15:06:53 +00:00
self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood)
self.likelihood = likelihood
2013-12-10 12:17:59 -08:00
#find a sensible inference method
2014-06-27 16:18:41 -07:00
logger.info("initializing inference method")
if inference_method is None:
2014-03-12 12:52:52 +00:00
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference()
2014-01-29 17:02:44 +00:00
else:
2014-03-14 11:47:23 +00:00
inference_method = expectation_propagation.EP()
2014-01-29 17:02:44 +00:00
print "defaulting to ", inference_method, "for latent function inference"
2014-01-24 10:24:17 +00:00
self.inference_method = inference_method
2013-06-05 14:11:49 +01:00
2014-06-27 16:18:41 -07:00
logger.info("adding kernel and likelihood as parameters")
2014-01-24 15:07:28 +00:00
self.add_parameter(self.kern)
self.add_parameter(self.likelihood)
2014-01-24 10:24:17 +00:00
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
2014-03-13 09:07:56 +00:00
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
2014-03-12 12:06:21 +00:00
2013-06-05 14:11:49 +01:00
def log_likelihood(self):
return self._log_marginal_likelihood
2013-06-05 14:11:49 +01:00
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
2013-06-05 14:11:49 +01:00
"""
For making predictions, does not account for normalization or likelihood
2013-12-04 20:12:40 +00:00
full_cov is a boolean which defines whether the full covariance matrix
of the prediction is computed. If full_cov is False (default), only the
diagonal of the covariance is returned.
$$
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
2013-06-05 14:11:49 +01:00
"""
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
2014-02-05 17:52:17 +00:00
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
2014-02-05 17:12:52 +00:00
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
2013-06-05 14:11:49 +01:00
if full_cov:
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
2013-06-05 14:11:49 +01:00
else:
Kxx = kern.Kdiag(_Xnew)
2014-02-05 17:52:17 +00:00
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
2013-06-05 14:11:49 +01:00
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
2013-06-05 14:11:49 +01:00
"""
Predict the function(s) at the new point(s) Xnew.
2013-09-20 17:46:23 +01:00
2013-06-05 14:11:49 +01:00
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
2014-01-28 14:45:00 +00:00
:param full_cov: whether to return the full covariance matrix, or just
the diagonal
2013-06-05 14:11:49 +01:00
:type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
2013-09-20 17:46:23 +01:00
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
2014-01-28 14:45:00 +00:00
:returns: var: posterior variance, a Numpy array, Nnew x 1 if
full_cov=False, Nnew x Nnew otherwise
:returns: lower and upper boundaries of the 95% confidence intervals,
Numpy arrays, Nnew x self.input_dim
2013-06-05 14:11:49 +01:00
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
2014-02-13 08:53:14 +00:00
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
2013-06-05 14:11:49 +01:00
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
if self.normalizer is not None:
return self.normalizer.inverse_mean(mean), self.normalizer.inverse_variance(var)
else:
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
m, v = self._raw_predict(X, full_cov=False)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
2013-06-05 14:11:49 +01:00
2014-08-13 10:36:54 +01:00
def predictive_gradients(self, Xnew):
"""
Compute the derivatives of the latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
dv_dX* -- [N*, Q], (since all outputs have the same variance)
"""
dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for i in range(self.output_dim):
dmu_dX[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self.X)
# gradients wrt the diagonal part k_{xx}
dv_dX = self.kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
alpha = -2.*np.dot(self.kern.K(Xnew, self.X),self.posterior.woodbury_inv)
dv_dX += self.kern.gradients_X(alpha, Xnew, self.X)
return dmu_dX, dv_dX
def posterior_samples_f(self,X,size=10, full_cov=True):
2014-01-22 15:06:53 +00:00
"""
Samples the posterior GP at the points X.
:param X: The points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim.
2014-01-28 13:39:59 +00:00
:param size: the number of a posteriori samples.
2014-01-22 15:06:53 +00:00
:type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
m, v = self._raw_predict(X, full_cov=full_cov)
2014-01-22 15:06:53 +00:00
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
else:
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
return Ysim
2014-03-13 16:44:39 +00:00
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
2014-01-22 15:06:53 +00:00
"""
Samples the posterior GP at the points X.
:param X: the points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim.
2014-01-28 13:39:59 +00:00
:param size: the number of a posteriori samples.
2014-01-22 15:06:53 +00:00
:type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
2014-03-13 16:44:39 +00:00
Ysim = self.likelihood.samples(Ysim, Y_metadata)
2014-01-22 15:06:53 +00:00
return Ysim
2014-05-16 11:21:08 +01:00
def plot_f(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=True,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
2014-01-22 15:06:53 +00:00
"""
2014-05-16 11:21:08 +01:00
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
This is a call to plot with plot_raw=True.
Data will not be plotted in this, as the GP's view of the world
may live in another space, or units then the data.
2014-01-22 15:06:53 +00:00
"""
2014-01-28 13:39:59 +00:00
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
2014-05-16 11:21:08 +01:00
kw = {}
if linecol is not None:
kw['linecol'] = linecol
if fillcol is not None:
kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows,
2014-06-27 15:19:11 -07:00
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
2014-05-16 11:21:08 +01:00
data_symbol=data_symbol, **kw)
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
2014-01-22 15:06:53 +00:00
"""
Plot the posterior of the GP.
2014-05-16 11:21:08 +01:00
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
2014-01-22 15:06:53 +00:00
Can plot only part of the data and part of the posterior functions
2014-05-16 11:21:08 +01:00
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol:
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
2014-01-28 13:39:59 +00:00
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
2014-05-16 11:21:08 +01:00
kw = {}
if linecol is not None:
kw['linecol'] = linecol
if fillcol is not None:
kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows,
2014-06-27 15:19:11 -07:00
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
2014-05-16 11:21:08 +01:00
data_symbol=data_symbol, **kw)
2014-01-22 15:06:53 +00:00
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this model
"""
return self.kern.input_sensitivity(summarize=summarize)
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:messages: whether to display during optimisation
:type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimizer: string
TODO: valid args
"""
self.inference_method.on_optimization_start()
try:
super(GP, self).optimize(optimizer, start, **kwargs)
except KeyboardInterrupt:
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
self.inference_method.on_optimization_end()
2014-06-27 15:19:11 -07:00
raise