2014-11-21 11:40:50 +00:00
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
2013-06-05 14:11:49 +01:00
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
2014-01-22 15:06:53 +00:00
from . . import kern
2015-10-15 14:59:57 +01:00
from . probabilistic_model import ProbabilisticModel
from paramz import ObsAr
2015-04-01 13:03:48 +01:00
from . mapping import Mapping
2013-12-05 15:09:31 -05:00
from . . import likelihoods
2014-11-11 10:21:58 +00:00
from . . inference . latent_function_inference import exact_gaussian_inference , expectation_propagation
2015-10-15 14:59:57 +01:00
from . variational import VariationalPosterior
2013-06-05 14:11:49 +01:00
2014-06-27 16:18:41 -07:00
import logging
2015-04-10 15:44:15 +01:00
import warnings
2014-08-27 15:47:41 -07:00
from GPy . util . normalizer import MeanNorm
2014-06-27 16:18:41 -07:00
logger = logging . getLogger ( " GP " )
2015-10-15 14:59:57 +01:00
class GP ( ProbabilisticModel ) :
2013-06-05 14:11:49 +01:00
"""
2014-01-22 15:06:53 +00:00
General purpose Gaussian process model
2013-06-05 14:11:49 +01:00
: param X : input observations
2014-01-22 15:06:53 +00:00
: param Y : output observations
2013-06-05 14:11:49 +01:00
: param kernel : a GPy kernel , defaults to rbf + white
2013-09-20 17:46:23 +01:00
: param likelihood : a GPy likelihood
2014-11-05 14:51:12 +00:00
: param inference_method : The : class : ` ~ GPy . inference . latent_function_inference . LatentFunctionInference ` inference method to use for this GP
2013-06-05 14:11:49 +01:00
: rtype : model object
2014-08-27 12:05:13 -07:00
: param Norm normalizer :
normalize the outputs Y .
Prediction will be un - normalized using this normalizer .
2014-08-27 15:47:41 -07:00
If normalizer is None , we will normalize using MeanNorm .
2014-08-27 12:05:13 -07:00
If normalizer is False , no normalization will be done .
2013-06-05 14:11:49 +01:00
. . Note : : Multiple independent outputs are allowed using columns of Y
2014-01-22 15:06:53 +00:00
2013-06-05 14:11:49 +01:00
"""
2015-03-23 14:56:19 +00:00
def __init__ ( self , X , Y , kernel , likelihood , mean_function = None , inference_method = None , name = ' gp ' , Y_metadata = None , normalizer = False ) :
2014-01-24 10:24:17 +00:00
super ( GP , self ) . __init__ ( name )
2014-01-22 15:06:53 +00:00
assert X . ndim == 2
2014-03-17 17:10:06 +00:00
if isinstance ( X , ( ObsAr , VariationalPosterior ) ) :
2014-05-21 16:32:44 +01:00
self . X = X . copy ( )
2014-06-27 15:19:11 -07:00
else : self . X = ObsAr ( X )
2014-02-24 15:44:11 +00:00
2014-01-22 15:06:53 +00:00
self . num_data , self . input_dim = self . X . shape
assert Y . ndim == 2
2014-06-27 16:18:41 -07:00
logger . info ( " initializing Y " )
2014-08-27 12:05:13 -07:00
2014-09-11 16:18:13 +01:00
if normalizer is True :
2014-08-27 15:47:41 -07:00
self . normalizer = MeanNorm ( )
2014-08-27 12:05:13 -07:00
elif normalizer is False :
self . normalizer = None
else :
self . normalizer = normalizer
if self . normalizer is not None :
self . normalizer . scale_by ( Y )
self . Y_normalized = ObsAr ( self . normalizer . normalize ( Y ) )
self . Y = Y
2015-08-07 13:37:47 +01:00
elif isinstance ( Y , np . ndarray ) :
2014-08-27 12:05:13 -07:00
self . Y = ObsAr ( Y )
self . Y_normalized = self . Y
2015-08-07 13:37:47 +01:00
else :
self . Y = Y
2014-08-27 12:05:13 -07:00
2015-04-10 15:44:15 +01:00
if Y . shape [ 0 ] != self . num_data :
#There can be cases where we want inputs than outputs, for example if we have multiple latent
#function values
warnings . warn ( " There are more rows in your input data X, \
than in your output data Y , be VERY sure this is what you want " )
2014-01-22 15:06:53 +00:00
_ , self . output_dim = self . Y . shape
2015-04-10 15:44:15 +01:00
assert ( ( Y_metadata is None ) or isinstance ( Y_metadata , dict ) )
2014-03-18 12:28:46 +00:00
self . Y_metadata = Y_metadata
2014-02-12 15:53:38 +00:00
2014-02-19 15:00:48 +00:00
assert isinstance ( kernel , kern . Kern )
2014-03-07 16:59:41 +00:00
#assert self.input_dim == kernel.input_dim
2014-01-22 15:06:53 +00:00
self . kern = kernel
assert isinstance ( likelihood , likelihoods . Likelihood )
self . likelihood = likelihood
2013-12-05 15:09:31 -05:00
2015-10-13 08:25:13 +01:00
if self . kern . _effective_input_dim != self . X . shape [ 1 ] :
warnings . warn ( " Your kernel has a different input dimension {} then the given X dimension {} . Be very sure this is what you want and you have not forgotten to set the right input dimenion in your kernel " . format ( self . kern . _effective_input_dim , self . X . shape [ 1 ] ) )
2015-03-23 14:56:19 +00:00
#handle the mean function
self . mean_function = mean_function
if mean_function is not None :
assert isinstance ( self . mean_function , Mapping )
assert mean_function . input_dim == self . input_dim
assert mean_function . output_dim == self . output_dim
2015-03-26 08:48:56 +00:00
self . link_parameter ( mean_function )
2015-03-23 14:56:19 +00:00
2013-12-10 12:17:59 -08:00
#find a sensible inference method
2014-06-27 16:18:41 -07:00
logger . info ( " initializing inference method " )
2013-12-05 15:09:31 -05:00
if inference_method is None :
2014-03-12 12:52:52 +00:00
if isinstance ( likelihood , likelihoods . Gaussian ) or isinstance ( likelihood , likelihoods . MixedNoise ) :
2013-12-05 15:09:31 -05:00
inference_method = exact_gaussian_inference . ExactGaussianInference ( )
2014-01-29 17:02:44 +00:00
else :
2014-03-14 11:47:23 +00:00
inference_method = expectation_propagation . EP ( )
2015-09-11 15:47:07 +01:00
print ( " defaulting to " + str ( inference_method ) + " for latent function inference " )
2014-01-24 10:24:17 +00:00
self . inference_method = inference_method
2013-06-05 14:11:49 +01:00
2014-06-27 16:18:41 -07:00
logger . info ( " adding kernel and likelihood as parameters " )
2014-09-08 08:57:28 +01:00
self . link_parameter ( self . kern )
self . link_parameter ( self . likelihood )
2015-04-17 12:17:30 +02:00
self . posterior = None
2015-09-02 09:06:17 +01:00
# The predictive variable to be used to predict using the posterior object's
# woodbury_vector and woodbury_inv is defined as predictive_variable
2015-09-07 16:52:59 +01:00
# as long as the posterior has the right woodbury entries.
# It is the input variable used for the covariance between
# X_star and the posterior of the GP.
2015-09-02 09:06:17 +01:00
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
# Make sure to name this variable and the predict functions will "just work"
2015-09-07 16:52:59 +01:00
# In maths the predictive variable is:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
# W_{pp} := \texttt{Woodbury inv}
# p := _predictive_variable
2015-09-02 09:06:17 +01:00
2015-09-10 15:50:49 +01:00
@property
def _predictive_variable ( self ) :
return self . X
2014-10-06 11:49:02 +01:00
2015-09-04 16:01:44 +01:00
def set_XY ( self , X = None , Y = None ) :
2014-11-03 17:26:31 +00:00
"""
2014-11-05 14:51:12 +00:00
Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model
2014-11-11 10:20:34 +00:00
2014-11-03 17:26:31 +00:00
: param X : input observations
2014-11-05 14:51:12 +00:00
: type X : np . ndarray
2014-11-03 17:26:31 +00:00
: param Y : output observations
2014-11-05 14:51:12 +00:00
: type Y : np . ndarray
2014-11-03 17:26:31 +00:00
"""
2015-09-04 16:01:44 +01:00
self . update_model ( False )
2014-11-03 17:26:31 +00:00
if Y is not None :
if self . normalizer is not None :
self . normalizer . scale_by ( Y )
self . Y_normalized = ObsAr ( self . normalizer . normalize ( Y ) )
self . Y = Y
else :
self . Y = ObsAr ( Y )
self . Y_normalized = self . Y
if X is not None :
if self . X in self . parameters :
# LVM models
if isinstance ( self . X , VariationalPosterior ) :
2014-11-03 17:37:33 +00:00
assert isinstance ( X , type ( self . X ) ) , " The given X must have the same type as the X in the model! "
2014-11-03 17:26:31 +00:00
self . unlink_parameter ( self . X )
self . X = X
2015-09-04 16:01:44 +01:00
self . link_parameter ( self . X )
2014-11-03 17:26:31 +00:00
else :
self . unlink_parameter ( self . X )
from . . core import Param
self . X = Param ( ' latent mean ' , X )
2015-09-04 16:01:44 +01:00
self . link_parameter ( self . X )
2014-11-03 17:26:31 +00:00
else :
self . X = ObsAr ( X )
2015-09-04 16:01:44 +01:00
self . update_model ( True )
2014-11-03 17:26:31 +00:00
2015-09-04 16:01:44 +01:00
def set_X ( self , X ) :
2014-11-03 17:26:31 +00:00
"""
2014-11-05 14:51:12 +00:00
Set the input data of the model
: param X : input observations
: type X : np . ndarray
2014-11-03 17:26:31 +00:00
"""
2015-09-04 16:01:44 +01:00
self . set_XY ( X = X )
2014-09-24 14:36:25 +01:00
2015-09-04 16:01:44 +01:00
def set_Y ( self , Y ) :
2014-11-03 17:26:31 +00:00
"""
2014-11-05 14:51:12 +00:00
Set the output data of the model
: param X : output observations
: type X : np . ndarray
2014-11-03 17:26:31 +00:00
"""
2015-09-04 16:01:44 +01:00
self . set_XY ( Y = Y )
2014-01-24 10:24:17 +00:00
2013-10-17 14:38:43 +01:00
def parameters_changed ( self ) :
2014-11-05 14:51:12 +00:00
"""
Method that is called upon any changes to : class : ` ~ GPy . core . parameterization . param . Param ` variables within the model .
In particular in the GP class this method reperforms inference , recalculating the posterior and log marginal likelihood and gradients of the model
. . warning : :
This method is not designed to be called manually , the framework is set up to automatically call this method upon changes to parameters , if you call
this method yourself , there may be unexpected consequences .
"""
2015-03-23 14:56:19 +00:00
self . posterior , self . _log_marginal_likelihood , self . grad_dict = self . inference_method . inference ( self . kern , self . X , self . likelihood , self . Y_normalized , self . mean_function , self . Y_metadata )
2014-03-13 12:13:00 +00:00
self . likelihood . update_gradients ( self . grad_dict [ ' dL_dthetaL ' ] )
2014-03-13 09:07:56 +00:00
self . kern . update_gradients_full ( self . grad_dict [ ' dL_dK ' ] , self . X )
2015-03-26 08:48:56 +00:00
if self . mean_function is not None :
self . mean_function . update_gradients ( self . grad_dict [ ' dL_dm ' ] , self . X )
2014-03-12 12:06:21 +00:00
2013-06-05 14:11:49 +01:00
def log_likelihood ( self ) :
2014-11-05 14:51:12 +00:00
"""
The log marginal likelihood of the model , : math : ` p ( \mathbf { y } ) ` , this is the objective function of the model being optimised
"""
2014-01-27 15:37:20 +00:00
return self . _log_marginal_likelihood
2013-06-05 14:11:49 +01:00
2015-08-18 11:39:27 +01:00
def _raw_predict ( self , Xnew , full_cov = False , kern = None ) :
2013-06-05 14:11:49 +01:00
"""
2014-03-21 14:22:42 +00:00
For making predictions , does not account for normalization or likelihood
2013-12-04 20:12:40 +00:00
full_cov is a boolean which defines whether the full covariance matrix
of the prediction is computed . If full_cov is False ( default ) , only the
diagonal of the covariance is returned .
2014-11-05 14:51:12 +00:00
. . math : :
p ( f * | X * , X , Y ) = \int ^ { \inf } _ { \inf } p ( f * | f , X * ) p ( f | X , Y ) df
= N ( f * | K_ { x * x } ( K_ { xx } + \Sigma ) ^ { - 1 } Y , K_ { x * x * } - K_ { xx * } ( K_ { xx } + \Sigma ) ^ { - 1 } K_ { xx * }
\Sigma := \texttt { Likelihood . variance / Approximate likelihood covariance }
2013-06-05 14:11:49 +01:00
"""
2014-04-30 12:11:41 +01:00
if kern is None :
kern = self . kern
2015-09-07 14:11:01 +01:00
Kx = kern . K ( self . _predictive_variable , Xnew )
2014-02-05 17:12:52 +00:00
mu = np . dot ( Kx . T , self . posterior . woodbury_vector )
2015-09-01 22:38:49 +01:00
if len ( mu . shape ) == 1 :
mu = mu . reshape ( - 1 , 1 )
2013-06-05 14:11:49 +01:00
if full_cov :
2015-08-18 11:39:27 +01:00
Kxx = kern . K ( Xnew )
if self . posterior . woodbury_inv . ndim == 2 :
var = Kxx - np . dot ( Kx . T , np . dot ( self . posterior . woodbury_inv , Kx ) )
2015-09-07 16:52:59 +01:00
elif self . posterior . woodbury_inv . ndim == 3 : # Missing data
2015-08-18 11:39:27 +01:00
var = np . empty ( ( Kxx . shape [ 0 ] , Kxx . shape [ 1 ] , self . posterior . woodbury_inv . shape [ 2 ] ) )
2015-09-02 09:06:17 +01:00
from . . util . linalg import mdot
2015-08-18 11:39:27 +01:00
for i in range ( var . shape [ 2 ] ) :
var [ : , : , i ] = ( Kxx - mdot ( Kx . T , self . posterior . woodbury_inv [ : , : , i ] , Kx ) )
var = var
2013-06-05 14:11:49 +01:00
else :
2015-08-18 11:39:27 +01:00
Kxx = kern . Kdiag ( Xnew )
if self . posterior . woodbury_inv . ndim == 2 :
var = ( Kxx - np . sum ( np . dot ( self . posterior . woodbury_inv . T , Kx ) * Kx , 0 ) ) [ : , None ]
2015-09-07 16:52:59 +01:00
elif self . posterior . woodbury_inv . ndim == 3 : # Missing data
2015-08-18 11:39:27 +01:00
var = np . empty ( ( Kxx . shape [ 0 ] , self . posterior . woodbury_inv . shape [ 2 ] ) )
for i in range ( var . shape [ 1 ] ) :
var [ : , i ] = ( Kxx - ( np . sum ( np . dot ( self . posterior . woodbury_inv [ : , : , i ] . T , Kx ) * Kx , 0 ) ) )
var = var
#add in the mean function
if self . mean_function is not None :
mu + = self . mean_function . f ( Xnew )
2015-03-23 14:59:08 +00:00
2013-06-05 14:11:49 +01:00
return mu , var
2015-10-03 13:59:01 +01:00
def predict ( self , Xnew , full_cov = False , Y_metadata = None , kern = None , likelihood = None ) :
2013-06-05 14:11:49 +01:00
"""
Predict the function ( s ) at the new point ( s ) Xnew .
2013-09-20 17:46:23 +01:00
2013-06-05 14:11:49 +01:00
: param Xnew : The points at which to make a prediction
2014-11-05 14:51:12 +00:00
: type Xnew : np . ndarray ( Nnew x self . input_dim )
2014-01-28 14:45:00 +00:00
: param full_cov : whether to return the full covariance matrix , or just
the diagonal
2013-06-05 14:11:49 +01:00
: type full_cov : bool
2014-04-30 12:11:41 +01:00
: param Y_metadata : metadata about the predicting point to pass to the likelihood
: param kern : The kernel to use for prediction ( defaults to the model
kern ) . this is useful for examining e . g . subprocesses .
2015-05-11 11:21:45 +01:00
: returns : ( mean , var ) :
2014-11-05 14:51:12 +00:00
mean : posterior mean , a Numpy array , Nnew x self . input_dim
var : posterior variance , a Numpy array , Nnew x 1 if full_cov = False , Nnew x Nnew otherwise
2013-06-05 14:11:49 +01:00
If full_cov and self . input_dim > 1 , the return shape of var is Nnew x Nnew x self . input_dim . If self . input_dim == 1 , the return shape is Nnew x Nnew .
This is to allow for different normalizations of the output dimensions .
2015-05-11 11:21:45 +01:00
2015-05-11 11:26:25 +01:00
Note : If you want the predictive quantiles ( e . g . 95 % confidence interval ) use : py : func : " ~GPy.core.gp.GP.predict_quantiles " .
2014-04-30 12:11:41 +01:00
"""
2014-02-13 08:53:14 +00:00
#predict the latent function values
2014-04-30 12:11:41 +01:00
mu , var = self . _raw_predict ( Xnew , full_cov = full_cov , kern = kern )
2014-08-27 13:07:23 -07:00
if self . normalizer is not None :
mu , var = self . normalizer . inverse_mean ( mu ) , self . normalizer . inverse_variance ( var )
2013-06-05 14:11:49 +01:00
# now push through likelihood
2015-10-03 13:59:01 +01:00
if likelihood is None :
likelihood = self . likelihood
mean , var = likelihood . predictive_values ( mu , var , full_cov , Y_metadata = Y_metadata )
2014-08-27 13:07:23 -07:00
return mean , var
2014-03-13 14:42:03 +00:00
2015-10-02 07:47:57 +01:00
def predict_quantiles ( self , X , quantiles = ( 2.5 , 97.5 ) , Y_metadata = None , kern = None , likelihood = None ) :
2014-11-05 14:51:12 +00:00
"""
Get the predictive quantiles around the prediction at X
: param X : The points at which to make a prediction
: type X : np . ndarray ( Xnew x self . input_dim )
: param quantiles : tuple of quantiles , default is ( 2.5 , 97.5 ) which is the 95 % interval
: type quantiles : tuple
2015-08-18 08:00:47 +01:00
: param kern : optional kernel to use for prediction
: type predict_kw : dict
2014-11-05 14:51:12 +00:00
: returns : list of quantiles for each X and predictive quantiles for interval combination
2015-05-11 11:21:45 +01:00
: rtype : [ np . ndarray ( Xnew x self . output_dim ) , np . ndarray ( Xnew x self . output_dim ) ]
2014-11-05 14:51:12 +00:00
"""
2015-08-18 08:00:47 +01:00
m , v = self . _raw_predict ( X , full_cov = False , kern = kern )
2014-08-27 13:07:23 -07:00
if self . normalizer is not None :
m , v = self . normalizer . inverse_mean ( m ) , self . normalizer . inverse_variance ( v )
2015-10-02 07:47:57 +01:00
if likelihood is None :
likelihood = self . likelihood
return likelihood . predictive_quantiles ( m , v , quantiles , Y_metadata = Y_metadata )
2013-06-05 14:11:49 +01:00
2015-10-05 18:47:54 +01:00
def predictive_gradients ( self , Xnew , kern = None ) :
2014-08-13 10:36:54 +01:00
"""
2015-03-30 13:41:25 +01:00
Compute the derivatives of the predicted latent function with respect to X *
2014-08-13 10:36:54 +01:00
Given a set of points at which to predict X * ( size [ N * , Q ] ) , compute the
derivatives of the mean and variance . Resulting arrays are sized :
dmu_dX * - - [ N * , Q , D ] , where D is the number of output in this GP ( usually one ) .
2014-11-05 14:51:12 +00:00
2015-03-30 13:41:25 +01:00
Note that this is not the same as computing the mean and variance of the derivative of the function !
2014-08-13 10:36:54 +01:00
dv_dX * - - [ N * , Q ] , ( since all outputs have the same variance )
2014-11-05 14:51:12 +00:00
: param X : The points at which to get the predictive gradients
: type X : np . ndarray ( Xnew x self . input_dim )
: returns : dmu_dX , dv_dX
: rtype : [ np . ndarray ( N * , Q , D ) , np . ndarray ( N * , Q ) ]
2014-08-13 10:36:54 +01:00
"""
2015-10-05 18:47:54 +01:00
if kern is None :
kern = self . kern
mean_jac = np . empty ( ( Xnew . shape [ 0 ] , Xnew . shape [ 1 ] , self . output_dim ) )
2014-08-13 10:36:54 +01:00
for i in range ( self . output_dim ) :
2015-10-05 18:47:54 +01:00
mean_jac [ : , : , i ] = kern . gradients_X ( self . posterior . woodbury_vector [ : , i : i + 1 ] . T , Xnew , self . _predictive_variable )
2014-08-13 10:36:54 +01:00
# gradients wrt the diagonal part k_{xx}
2015-10-05 18:47:54 +01:00
dv_dX = kern . gradients_X ( np . eye ( Xnew . shape [ 0 ] ) , Xnew )
2014-08-13 10:36:54 +01:00
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
2015-10-05 18:47:54 +01:00
alpha = - 2. * np . dot ( kern . K ( Xnew , self . _predictive_variable ) , self . posterior . woodbury_inv )
dv_dX + = kern . gradients_X ( alpha , Xnew , self . _predictive_variable )
return mean_jac , dv_dX
2014-08-13 10:36:54 +01:00
2015-09-02 09:06:17 +01:00
def predict_jacobian ( self , Xnew , kern = None , full_cov = True ) :
"""
Compute the derivatives of the posterior of the GP .
Given a set of points at which to predict X * ( size [ N * , Q ] ) , compute the
mean and variance of the derivative . Resulting arrays are sized :
dL_dX * - - [ N * , Q , D ] , where D is the number of output in this GP ( usually one ) .
Note that this is the mean and variance of the derivative ,
not the derivative of the mean and variance ! ( See predictive_gradients for that )
dv_dX * - - [ N * , Q ] , ( since all outputs have the same variance )
If there is missing data , it is not implemented for now , but
there will be one output variance per output dimension .
: param X : The points at which to get the predictive gradients .
: type X : np . ndarray ( Xnew x self . input_dim )
: param kern : The kernel to compute the jacobian for .
: param boolean full_cov : whether to return the full covariance of the jacobian .
: returns : dmu_dX , dv_dX
: rtype : [ np . ndarray ( N * , Q , D ) , np . ndarray ( N * , Q , ( D ) ) ]
Note : We always return sum in input_dim gradients , as the off - diagonals
in the input_dim are not needed for further calculations .
This is a compromise for increase in speed . Mathematically the jacobian would
have another dimension in Q .
"""
if kern is None :
kern = self . kern
mean_jac = np . empty ( ( Xnew . shape [ 0 ] , Xnew . shape [ 1 ] , self . output_dim ) )
for i in range ( self . output_dim ) :
mean_jac [ : , : , i ] = kern . gradients_X ( self . posterior . woodbury_vector [ : , i : i + 1 ] . T , Xnew , self . _predictive_variable )
dK_dXnew_full = np . empty ( ( self . _predictive_variable . shape [ 0 ] , Xnew . shape [ 0 ] , Xnew . shape [ 1 ] ) )
for i in range ( self . _predictive_variable . shape [ 0 ] ) :
dK_dXnew_full [ i ] = kern . gradients_X ( [ [ 1. ] ] , Xnew , self . _predictive_variable [ [ i ] ] )
2015-09-03 10:19:57 +01:00
if full_cov :
dK2_dXdX = kern . gradients_XX ( [ [ 1. ] ] , Xnew )
else :
dK2_dXdX = kern . gradients_XX_diag ( [ [ 1. ] ] , Xnew )
2015-09-02 09:06:17 +01:00
def compute_cov_inner ( wi ) :
if full_cov :
# full covariance gradients:
var_jac = dK2_dXdX - np . einsum ( ' qnm,miq->niq ' , dK_dXnew_full . T . dot ( wi ) , dK_dXnew_full )
else :
var_jac = dK2_dXdX - np . einsum ( ' qim,miq->iq ' , dK_dXnew_full . T . dot ( wi ) , dK_dXnew_full )
return var_jac
2015-09-07 16:52:59 +01:00
if self . posterior . woodbury_inv . ndim == 3 : # Missing data:
if full_cov :
var_jac = np . empty ( ( Xnew . shape [ 0 ] , Xnew . shape [ 0 ] , Xnew . shape [ 1 ] , self . output_dim ) )
for d in range ( self . posterior . woodbury_inv . shape [ 2 ] ) :
var_jac [ : , : , : , d ] = compute_cov_inner ( self . posterior . woodbury_inv [ : , : , d ] )
else :
var_jac = np . empty ( ( Xnew . shape [ 0 ] , Xnew . shape [ 1 ] , self . output_dim ) )
for d in range ( self . posterior . woodbury_inv . shape [ 2 ] ) :
var_jac [ : , : , d ] = compute_cov_inner ( self . posterior . woodbury_inv [ : , : , d ] )
2015-09-02 09:06:17 +01:00
else :
var_jac = compute_cov_inner ( self . posterior . woodbury_inv )
return mean_jac , var_jac
2015-09-03 09:33:07 +01:00
def predict_wishard_embedding ( self , Xnew , kern = None , mean = True , covariance = True ) :
2015-09-02 09:06:17 +01:00
"""
Predict the wishard embedding G of the GP . This is the density of the
input of the GP defined by the probabilistic function mapping f .
G = J_mean . T * J_mean + output_dim * J_cov .
: param array - like Xnew : The points at which to evaluate the magnification .
: param : py : class : ` ~ GPy . kern . Kern ` kern : The kernel to use for the magnification .
Supplying only a part of the learning kernel gives insights into the density
of the specific kernel part of the input function . E . g . one can see how dense the
linear part of a kernel is compared to the non - linear part etc .
"""
if kern is None :
kern = self . kern
mu_jac , var_jac = self . predict_jacobian ( Xnew , kern , full_cov = False )
mumuT = np . einsum ( ' iqd,ipd->iqp ' , mu_jac , mu_jac )
2015-09-07 16:52:59 +01:00
Sigma = np . zeros ( mumuT . shape )
2015-09-02 09:06:17 +01:00
if var_jac . ndim == 3 :
2015-09-07 16:52:59 +01:00
Sigma [ ( slice ( None ) , ) + np . diag_indices ( Xnew . shape [ 1 ] , 2 ) ] = var_jac . sum ( - 1 )
2015-09-02 09:06:17 +01:00
else :
2015-09-07 16:52:59 +01:00
Sigma [ ( slice ( None ) , ) + np . diag_indices ( Xnew . shape [ 1 ] , 2 ) ] = self . output_dim * var_jac
2015-09-03 09:33:07 +01:00
G = 0.
if mean :
G + = mumuT
if covariance :
G + = Sigma
2015-09-02 09:06:17 +01:00
return G
2015-09-03 09:33:07 +01:00
def predict_magnification ( self , Xnew , kern = None , mean = True , covariance = True ) :
2015-09-02 09:06:17 +01:00
"""
Predict the magnification factor as
sqrt ( det ( G ) )
for each point N in Xnew
"""
2015-09-03 09:33:07 +01:00
G = self . predict_wishard_embedding ( Xnew , kern , mean , covariance )
2015-09-02 15:46:40 +01:00
from . . util . linalg import jitchol
2015-09-07 16:52:59 +01:00
mag = np . empty ( Xnew . shape [ 0 ] )
for n in range ( Xnew . shape [ 0 ] ) :
try :
mag [ n ] = np . sqrt ( np . exp ( 2 * np . sum ( np . log ( np . diag ( jitchol ( G [ n , : , : ] ) ) ) ) ) )
except :
mag [ n ] = np . sqrt ( np . linalg . det ( G [ n , : , : ] ) )
return mag
2015-09-02 09:06:17 +01:00
2015-10-03 21:14:32 +01:00
def posterior_samples_f ( self , X , size = 10 , full_cov = True , * * predict_kwargs ) :
2014-01-22 15:06:53 +00:00
"""
Samples the posterior GP at the points X .
: param X : The points at which to take the samples .
2014-11-05 14:51:12 +00:00
: type X : np . ndarray ( Nnew x self . input_dim )
2014-01-28 13:39:59 +00:00
: param size : the number of a posteriori samples .
2014-01-22 15:06:53 +00:00
: type size : int .
: param full_cov : whether to return the full covariance matrix , or just the diagonal .
: type full_cov : bool .
2015-04-10 10:40:18 +01:00
: returns : fsim : set of simulations
2015-10-03 21:14:32 +01:00
: rtype : np . ndarray ( D x N x samples ) ( if D == 1 we flatten out the first dimension )
2014-01-22 15:06:53 +00:00
"""
2015-10-03 21:14:32 +01:00
m , v = self . _raw_predict ( X , full_cov = full_cov , * * predict_kwargs )
2014-08-27 13:07:23 -07:00
if self . normalizer is not None :
m , v = self . normalizer . inverse_mean ( m ) , self . normalizer . inverse_variance ( v )
2015-10-03 21:14:32 +01:00
def sim_one_dim ( m , v ) :
if not full_cov :
return np . random . multivariate_normal ( m . flatten ( ) , np . diag ( v . flatten ( ) ) , size ) . T
else :
return np . random . multivariate_normal ( m . flatten ( ) , v , size ) . T
2014-01-22 15:06:53 +00:00
2015-10-03 21:14:32 +01:00
if self . output_dim == 1 :
return sim_one_dim ( m , v )
else :
fsim = np . empty ( ( self . output_dim , self . num_data , size ) )
for d in range ( self . output_dim ) :
if full_cov and v . ndim == 3 :
fsim [ d ] = sim_one_dim ( m [ : , d ] , v [ : , : , d ] )
elif ( not full_cov ) and v . ndim == 2 :
fsim [ d ] = sim_one_dim ( m [ : , d ] , v [ : , d ] )
else :
fsim [ d ] = sim_one_dim ( m [ : , d ] , v )
2015-04-10 10:40:18 +01:00
return fsim
2014-01-22 15:06:53 +00:00
2015-10-03 21:14:32 +01:00
def posterior_samples ( self , X , size = 10 , full_cov = False , Y_metadata = None , likelihood = None , * * predict_kwargs ) :
2014-01-22 15:06:53 +00:00
"""
Samples the posterior GP at the points X .
: param X : the points at which to take the samples .
2014-11-05 14:51:12 +00:00
: type X : np . ndarray ( Nnew x self . input_dim . )
2014-01-28 13:39:59 +00:00
: param size : the number of a posteriori samples .
2014-01-22 15:06:53 +00:00
: type size : int .
: param full_cov : whether to return the full covariance matrix , or just the diagonal .
: type full_cov : bool .
: param noise_model : for mixed noise likelihood , the noise model to use in the samples .
: type noise_model : integer .
2015-10-03 21:14:32 +01:00
: returns : Ysim : set of simulations ,
: rtype : np . ndarray ( D x N x samples ) ( if D == 1 we flatten out the first dimension )
2014-01-22 15:06:53 +00:00
"""
2015-10-03 21:14:32 +01:00
fsim = self . posterior_samples_f ( X , size , full_cov = full_cov , * * predict_kwargs )
if likelihood is None :
likelihood = self . likelihood
if fsim . ndim == 3 :
for d in range ( fsim . shape [ 0 ] ) :
fsim [ d ] = likelihood . samples ( fsim [ d ] , Y_metadata = Y_metadata )
else :
fsim = likelihood . samples ( fsim , Y_metadata = Y_metadata )
return fsim
2014-01-22 15:06:53 +00:00
2014-08-25 09:46:20 -07:00
def input_sensitivity ( self , summarize = True ) :
2014-03-27 10:08:45 +00:00
"""
Returns the sensitivity for each dimension of this model
"""
2014-08-25 09:46:20 -07:00
return self . kern . input_sensitivity ( summarize = summarize )
2014-03-27 10:08:45 +00:00
2015-10-04 12:31:22 +01:00
def get_most_significant_input_dimensions ( self , which_indices = None ) :
return self . kern . get_most_significant_input_dimensions ( which_indices )
2014-05-15 14:06:00 +01:00
def optimize ( self , optimizer = None , start = None , * * kwargs ) :
"""
Optimize the model using self . log_likelihood and self . log_likelihood_gradient , as well as self . priors .
kwargs are passed to the optimizer . They can be :
: param max_f_eval : maximum number of function evaluations
: type max_f_eval : int
: messages : whether to display during optimisation
: type messages : bool
2014-11-05 14:51:12 +00:00
: param optimizer : which optimizer to use ( defaults to self . preferred optimizer ) , a range of optimisers can be found in : module : ` ~ GPy . inference . optimization ` , they include ' scg ' , ' lbfgs ' , ' tnc ' .
2014-05-15 14:06:00 +01:00
: type optimizer : string
"""
self . inference_method . on_optimization_start ( )
2014-05-23 11:11:21 +01:00
try :
super ( GP , self ) . optimize ( optimizer , start , * * kwargs )
except KeyboardInterrupt :
2015-02-26 08:11:11 +00:00
print ( " KeyboardInterrupt caught, calling on_optimization_end() to round things up " )
2014-05-23 11:11:21 +01:00
self . inference_method . on_optimization_end ( )
2014-06-27 15:19:11 -07:00
raise
2014-11-11 10:20:34 +00:00
2015-07-27 12:51:03 +01:00
def infer_newX ( self , Y_new , optimize = True ) :
2014-11-03 16:04:15 +00:00
"""
2015-07-27 12:51:03 +01:00
Infer X for the new observed data * Y_new * .
2014-11-11 10:20:34 +00:00
2014-11-03 16:04:15 +00:00
: param Y_new : the new observed data for inference
: type Y_new : numpy . ndarray
: param optimize : whether to optimize the location of new X ( True by default )
: type optimize : boolean
2014-11-11 10:20:34 +00:00
: return : a tuple containing the posterior estimation of X and the model that optimize X
2015-10-14 10:28:23 +01:00
: rtype : ( : class : ` ~ GPy . core . parameterization . variational . VariationalPosterior ` and numpy . ndarray , : class : ` ~ GPy . core . probabilistic_model . Model ` )
2014-11-03 16:04:15 +00:00
"""
from . . inference . latent_function_inference . inferenceX import infer_newX
return infer_newX ( self , Y_new , optimize = optimize )
2015-04-27 18:56:20 +01:00
def log_predictive_density ( self , x_test , y_test , Y_metadata = None ) :
"""
Calculation of the log predictive density
. . math :
p ( y_ { * } | D ) = p ( y_ { * } | f_ { * } ) p ( f_ { * } | \mu_ { * } \\sigma ^ { 2 } _ { * } )
: param x_test : test locations ( x_ { * } )
: type x_test : ( Nx1 ) array
: param y_test : test observations ( y_ { * } )
: type y_test : ( Nx1 ) array
: param Y_metadata : metadata associated with the test points
"""
mu_star , var_star = self . _raw_predict ( x_test )
return self . likelihood . log_predictive_density ( y_test , mu_star , var_star , Y_metadata = Y_metadata )
def log_predictive_density_sampling ( self , x_test , y_test , Y_metadata = None , num_samples = 1000 ) :
"""
Calculation of the log predictive density by sampling
. . math :
p ( y_ { * } | D ) = p ( y_ { * } | f_ { * } ) p ( f_ { * } | \mu_ { * } \\sigma ^ { 2 } _ { * } )
: param x_test : test locations ( x_ { * } )
: type x_test : ( Nx1 ) array
: param y_test : test observations ( y_ { * } )
: type y_test : ( Nx1 ) array
: param Y_metadata : metadata associated with the test points
: param num_samples : number of samples to use in monte carlo integration
: type num_samples : int
"""
mu_star , var_star = self . _raw_predict ( x_test )
return self . likelihood . log_predictive_density_sampling ( y_test , mu_star , var_star , Y_metadata = Y_metadata , num_samples = num_samples )