mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-07 19:12:40 +02:00
first commit in new structure
This commit is contained in:
parent
b502efb1c5
commit
daaad3c30c
16 changed files with 424 additions and 1750 deletions
|
|
@ -19,7 +19,6 @@ class EP(likelihood):
|
||||||
self.num_data, self.output_dim = self.data.shape
|
self.num_data, self.output_dim = self.data.shape
|
||||||
self.is_heteroscedastic = True
|
self.is_heteroscedastic = True
|
||||||
self.num_params = 0
|
self.num_params = 0
|
||||||
self._transf_data = self.noise_model._preprocess_values(data)
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
#Initial values - Likelihood approximation parameters:
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
|
@ -50,10 +49,26 @@ class EP(likelihood):
|
||||||
self.VVT_factor = self.V
|
self.VVT_factor = self.V
|
||||||
self.trYYT = 0.
|
self.trYYT = 0.
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov):
|
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||||
if full_cov:
|
if full_cov:
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
return self.noise_model.predictive_values(mu,var)
|
return self.noise_model.predictive_values(mu,var,**noise_args)
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
#return np.zeros(0)
|
#return np.zeros(0)
|
||||||
|
|
@ -134,7 +149,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
|
|
@ -233,7 +248,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
|
@ -336,7 +351,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
403
GPy/inference/laplace.py
Normal file
403
GPy/inference/laplace.py
Normal file
|
|
@ -0,0 +1,403 @@
|
||||||
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
#
|
||||||
|
#Parts of this file were influenced by the Matlab GPML framework written by
|
||||||
|
#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
|
||||||
|
#
|
||||||
|
#The GPML code is released under the FreeBSD License.
|
||||||
|
#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
|
||||||
|
#
|
||||||
|
#The code and associated documentation is available from
|
||||||
|
#http://gaussianprocess.org/gpml/code.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import scipy as sp
|
||||||
|
from likelihood import likelihood
|
||||||
|
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||||
|
from functools import partial as partial_func
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
class Laplace(likelihood):
|
||||||
|
"""Laplace approximation to a posterior"""
|
||||||
|
|
||||||
|
def __init__(self, data, noise_model, extra_data=None):
|
||||||
|
"""
|
||||||
|
Laplace Approximation
|
||||||
|
|
||||||
|
Find the moments \hat{f} and the hessian at this point
|
||||||
|
(using Newton-Raphson) of the unnormalised posterior
|
||||||
|
|
||||||
|
Compute the GP variables (i.e. generate some Y^{squiggle} and
|
||||||
|
z^{squiggle} which makes a gaussian the same as the laplace
|
||||||
|
approximation to the posterior, but normalised
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
|
||||||
|
:param data: array of data the likelihood function is approximating
|
||||||
|
:type data: NxD
|
||||||
|
:param noise_model: likelihood function - subclass of noise_model
|
||||||
|
:type noise_model: noise_model
|
||||||
|
:param extra_data: additional data used by some likelihood functions,
|
||||||
|
"""
|
||||||
|
self.data = data
|
||||||
|
self.noise_model = noise_model
|
||||||
|
self.extra_data = extra_data
|
||||||
|
|
||||||
|
#Inital values
|
||||||
|
self.N, self.D = self.data.shape
|
||||||
|
self.is_heteroscedastic = True
|
||||||
|
self.Nparams = 0
|
||||||
|
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
|
||||||
|
|
||||||
|
self.restart()
|
||||||
|
likelihood.__init__(self)
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
"""
|
||||||
|
Reset likelihood variables to their defaults
|
||||||
|
"""
|
||||||
|
#Initial values for the GP variables
|
||||||
|
self.Y = np.zeros((self.N, 1))
|
||||||
|
self.covariance_matrix = np.eye(self.N)
|
||||||
|
self.precision = np.ones(self.N)[:, None]
|
||||||
|
self.Z = 0
|
||||||
|
self.YYT = None
|
||||||
|
|
||||||
|
self.old_Ki_f = None
|
||||||
|
self.bad_fhat = False
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
|
return self.noise_model.predictive_values(mu,var,**noise_args)
|
||||||
|
|
||||||
|
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||||
|
"""
|
||||||
|
Calculation of the log predictive density
|
||||||
|
|
||||||
|
.. math:
|
||||||
|
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||||
|
|
||||||
|
:param y_test: test observations (y_{*})
|
||||||
|
:type y_test: (Nx1) array
|
||||||
|
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type mu_star: (Nx1) array
|
||||||
|
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||||
|
:type var_star: (Nx1) array
|
||||||
|
"""
|
||||||
|
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self.noise_model._get_params())
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.noise_model._get_param_names()
|
||||||
|
|
||||||
|
def _set_params(self, p):
|
||||||
|
return self.noise_model._set_params(p)
|
||||||
|
|
||||||
|
def _shared_gradients_components(self):
|
||||||
|
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
|
||||||
|
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
|
||||||
|
return dL_dfhat, I_KW_i
|
||||||
|
|
||||||
|
def _Kgradients(self):
|
||||||
|
"""
|
||||||
|
Gradients with respect to prior kernel parameters dL_dK to be chained
|
||||||
|
with dK_dthetaK to give dL_dthetaK
|
||||||
|
:returns: dL_dK matrix
|
||||||
|
:rtype: Matrix (1 x num_kernel_params)
|
||||||
|
"""
|
||||||
|
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||||
|
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
#Explicit
|
||||||
|
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
|
||||||
|
#expl_b = self.Wi_K_i
|
||||||
|
#expl = 0.5*expl_a - 0.5*expl_b
|
||||||
|
#dL_dthetaK_exp = dK_dthetaK(expl, X)
|
||||||
|
|
||||||
|
#Implicit
|
||||||
|
impl = mdot(dlp, dL_dfhat, I_KW_i)
|
||||||
|
|
||||||
|
#No longer required as we are computing these in the gp already
|
||||||
|
#otherwise we would take them away and add them back
|
||||||
|
#dL_dthetaK_imp = dK_dthetaK(impl, X)
|
||||||
|
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
||||||
|
#dL_dK = expl + impl
|
||||||
|
|
||||||
|
#No need to compute explicit as we are computing dZ_dK to account
|
||||||
|
#for the difference between the K gradients of a normal GP,
|
||||||
|
#and the K gradients including the implicit part
|
||||||
|
dL_dK = impl
|
||||||
|
return dL_dK
|
||||||
|
|
||||||
|
def _gradients(self, partial):
|
||||||
|
"""
|
||||||
|
Gradients with respect to likelihood parameters (dL_dthetaL)
|
||||||
|
|
||||||
|
:param partial: Not needed by this likelihood
|
||||||
|
:type partial: lambda function
|
||||||
|
:rtype: array of derivatives (1 x num_likelihood_params)
|
||||||
|
"""
|
||||||
|
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||||
|
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
#len(dlik_dthetaL)
|
||||||
|
num_params = len(self._get_param_names())
|
||||||
|
# make space for one derivative for each likelihood parameter
|
||||||
|
dL_dthetaL = np.zeros(num_params)
|
||||||
|
for thetaL_i in range(num_params):
|
||||||
|
#Explicit
|
||||||
|
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
|
||||||
|
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
|
||||||
|
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
|
||||||
|
)
|
||||||
|
|
||||||
|
#Implicit
|
||||||
|
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
|
||||||
|
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
|
||||||
|
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||||
|
|
||||||
|
return dL_dthetaL
|
||||||
|
|
||||||
|
def _compute_GP_variables(self):
|
||||||
|
"""
|
||||||
|
Generate data Y which would give the normal distribution identical
|
||||||
|
to the laplace approximation to the posterior, but normalised
|
||||||
|
|
||||||
|
GPy expects a likelihood to be gaussian, so need to caluclate
|
||||||
|
the data Y^{\tilde} that makes the posterior match that found
|
||||||
|
by a laplace approximation to a non-gaussian likelihood but with
|
||||||
|
a gaussian likelihood
|
||||||
|
|
||||||
|
Firstly,
|
||||||
|
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
|
||||||
|
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
|
||||||
|
we wish to find the hessian \Sigma^{\tilde}
|
||||||
|
that has the same curvature but using our new simulated data Y^{\tilde}
|
||||||
|
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
|
||||||
|
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
|
||||||
|
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
|
||||||
|
|
||||||
|
Secondly,
|
||||||
|
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
|
||||||
|
So we can suck up any differences between that and our log marginal likelihood approximation
|
||||||
|
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
|
||||||
|
which we want to optimize instead, by equating them and rearranging, the difference is added onto
|
||||||
|
the log p(y) that GPy optimizes by default
|
||||||
|
|
||||||
|
Thirdly,
|
||||||
|
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
|
||||||
|
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
|
||||||
|
gp.py code
|
||||||
|
"""
|
||||||
|
Wi = 1.0/self.W
|
||||||
|
self.Sigma_tilde = np.diagflat(Wi)
|
||||||
|
|
||||||
|
Y_tilde = Wi*self.Ki_f + self.f_hat
|
||||||
|
|
||||||
|
self.Wi_K_i = self.W12BiW12
|
||||||
|
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||||
|
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||||
|
|
||||||
|
Z_tilde = (+ lik
|
||||||
|
- 0.5*self.ln_B_det
|
||||||
|
+ 0.5*ln_det_Wi_K
|
||||||
|
- 0.5*self.f_Ki_f
|
||||||
|
+ 0.5*y_Wi_K_i_y
|
||||||
|
+ self.NORMAL_CONST
|
||||||
|
)
|
||||||
|
|
||||||
|
#Convert to float as its (1, 1) and Z must be a scalar
|
||||||
|
self.Z = np.float64(Z_tilde)
|
||||||
|
self.Y = Y_tilde
|
||||||
|
self.YYT = np.dot(self.Y, self.Y.T)
|
||||||
|
self.covariance_matrix = self.Sigma_tilde
|
||||||
|
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
|
||||||
|
|
||||||
|
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
|
||||||
|
self.dZ_dK = self._Kgradients()
|
||||||
|
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
|
||||||
|
|
||||||
|
def fit_full(self, K):
|
||||||
|
"""
|
||||||
|
The laplace approximation algorithm, find K and expand hessian
|
||||||
|
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
|
||||||
|
|
||||||
|
:param K: Prior covariance matrix evaluated at locations X
|
||||||
|
:type K: NxN matrix
|
||||||
|
"""
|
||||||
|
self.K = K.copy()
|
||||||
|
|
||||||
|
#Find mode
|
||||||
|
self.f_hat = self.rasm_mode(self.K)
|
||||||
|
|
||||||
|
#Compute hessian and other variables at mode
|
||||||
|
self._compute_likelihood_variables()
|
||||||
|
|
||||||
|
#Compute fake variables replicating laplace approximation to posterior
|
||||||
|
self._compute_GP_variables()
|
||||||
|
|
||||||
|
def _compute_likelihood_variables(self):
|
||||||
|
"""
|
||||||
|
Compute the variables required to compute gaussian Y variables
|
||||||
|
"""
|
||||||
|
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||||
|
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
if not self.noise_model.log_concave:
|
||||||
|
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
|
||||||
|
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||||
|
|
||||||
|
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
||||||
|
|
||||||
|
self.Ki_f = self.Ki_f
|
||||||
|
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
|
||||||
|
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
|
||||||
|
|
||||||
|
def _compute_B_statistics(self, K, W, a):
|
||||||
|
"""
|
||||||
|
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||||
|
Which has a positive diagonal element and can be easyily inverted
|
||||||
|
|
||||||
|
:param K: Prior Covariance matrix evaluated at locations X
|
||||||
|
:type K: NxN matrix
|
||||||
|
:param W: Negative hessian at a point (diagonal matrix)
|
||||||
|
:type W: Vector of diagonal values of hessian (1xN)
|
||||||
|
:param a: Matrix to calculate W12BiW12a
|
||||||
|
:type a: Matrix NxN
|
||||||
|
:returns: (W12BiW12, ln_B_det)
|
||||||
|
"""
|
||||||
|
if not self.noise_model.log_concave:
|
||||||
|
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||||
|
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||||
|
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||||
|
# To cause the posterior to become less certain than the prior and likelihood,
|
||||||
|
# This is a property only held by non-log-concave likelihoods
|
||||||
|
|
||||||
|
|
||||||
|
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||||
|
W_12 = np.sqrt(W)
|
||||||
|
B = np.eye(self.N) + W_12*K*W_12.T
|
||||||
|
L = jitchol(B)
|
||||||
|
|
||||||
|
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||||
|
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
||||||
|
return W12BiW12a, ln_B_det
|
||||||
|
|
||||||
|
def rasm_mode(self, K, MAX_ITER=40):
|
||||||
|
"""
|
||||||
|
Rasmussen's numerically stable mode finding
|
||||||
|
For nomenclature see Rasmussen & Williams 2006
|
||||||
|
Influenced by GPML (BSD) code, all errors are our own
|
||||||
|
|
||||||
|
:param K: Covariance matrix evaluated at locations X
|
||||||
|
:type K: NxD matrix
|
||||||
|
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
|
||||||
|
:type MAX_ITER: scalar
|
||||||
|
:returns: f_hat, mode on which to make laplace approxmiation
|
||||||
|
:rtype: NxD matrix
|
||||||
|
"""
|
||||||
|
#old_Ki_f = np.zeros((self.N, 1))
|
||||||
|
|
||||||
|
#Start f's at zero originally of if we have gone off track, try restarting
|
||||||
|
if self.old_Ki_f is None or self.bad_fhat:
|
||||||
|
old_Ki_f = np.random.rand(self.N, 1)/50.0
|
||||||
|
#old_Ki_f = self.Y
|
||||||
|
f = np.dot(K, old_Ki_f)
|
||||||
|
else:
|
||||||
|
#Start at the old best point
|
||||||
|
old_Ki_f = self.old_Ki_f.copy()
|
||||||
|
f = self.f_hat.copy()
|
||||||
|
|
||||||
|
new_obj = -np.inf
|
||||||
|
old_obj = np.inf
|
||||||
|
|
||||||
|
def obj(Ki_f, f):
|
||||||
|
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
difference = np.inf
|
||||||
|
epsilon = 1e-7
|
||||||
|
#step_size = 1
|
||||||
|
#rs = 0
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while difference > epsilon and i < MAX_ITER:
|
||||||
|
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
W_f = W*f
|
||||||
|
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
|
||||||
|
|
||||||
|
b = W_f + grad
|
||||||
|
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
|
||||||
|
|
||||||
|
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||||
|
full_step_Ki_f = b - W12BiW12Kb
|
||||||
|
dKi_f = full_step_Ki_f - old_Ki_f
|
||||||
|
|
||||||
|
f_old = f.copy()
|
||||||
|
def inner_obj(step_size, old_Ki_f, dKi_f, K):
|
||||||
|
Ki_f = old_Ki_f + step_size*dKi_f
|
||||||
|
f = np.dot(K, Ki_f)
|
||||||
|
# This is nasty, need to set something within an optimization though
|
||||||
|
self.tmp_Ki_f = Ki_f.copy()
|
||||||
|
self.tmp_f = f.copy()
|
||||||
|
return -obj(Ki_f, f)
|
||||||
|
|
||||||
|
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
|
||||||
|
#Find the stepsize that minimizes the objective function using a brent line search
|
||||||
|
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
|
||||||
|
#steps than get this exact then make a step, if B was bigger it might be the other way around though
|
||||||
|
#new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
|
||||||
|
new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
|
||||||
|
f = self.tmp_f.copy()
|
||||||
|
Ki_f = self.tmp_Ki_f.copy()
|
||||||
|
|
||||||
|
#Optimize without linesearch
|
||||||
|
#f_old = f.copy()
|
||||||
|
#update_passed = False
|
||||||
|
#while not update_passed:
|
||||||
|
#Ki_f = old_Ki_f + step_size*dKi_f
|
||||||
|
#f = np.dot(K, Ki_f)
|
||||||
|
|
||||||
|
#old_obj = new_obj
|
||||||
|
#new_obj = obj(Ki_f, f)
|
||||||
|
#difference = new_obj - old_obj
|
||||||
|
##print "difference: ",difference
|
||||||
|
#if difference < 0:
|
||||||
|
##print "Objective function rose", np.float(difference)
|
||||||
|
##If the objective function isn't rising, restart optimization
|
||||||
|
#step_size *= 0.8
|
||||||
|
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
|
||||||
|
##objective function isn't increasing, try reducing step size
|
||||||
|
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
|
||||||
|
#old_obj = new_obj
|
||||||
|
#rs += 1
|
||||||
|
#else:
|
||||||
|
#update_passed = True
|
||||||
|
|
||||||
|
#old_Ki_f = self.Ki_f.copy()
|
||||||
|
|
||||||
|
#difference = abs(new_obj - old_obj)
|
||||||
|
#old_obj = new_obj.copy()
|
||||||
|
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
|
||||||
|
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
|
||||||
|
old_Ki_f = Ki_f.copy()
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
self.old_Ki_f = old_Ki_f.copy()
|
||||||
|
|
||||||
|
#Warn of bad fits
|
||||||
|
if difference > epsilon:
|
||||||
|
self.bad_fhat = True
|
||||||
|
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||||
|
elif self.bad_fhat:
|
||||||
|
self.bad_fhat = False
|
||||||
|
warnings.warn("f_hat now perfect again")
|
||||||
|
|
||||||
|
self.Ki_f = Ki_f
|
||||||
|
return f
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
from ep import EP
|
|
||||||
from ep_mixed_noise import EP_Mixed_Noise
|
|
||||||
from gaussian import Gaussian
|
|
||||||
from gaussian_mixed_noise import Gaussian_Mixed_Noise
|
|
||||||
from noise_model_constructors import *
|
|
||||||
# TODO: from Laplace import Laplace
|
|
||||||
|
|
||||||
|
|
@ -1,385 +0,0 @@
|
||||||
# Copyright (c) 2013, Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
|
||||||
from likelihood import likelihood
|
|
||||||
|
|
||||||
class EP_Mixed_Noise(likelihood):
|
|
||||||
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
|
|
||||||
"""
|
|
||||||
Expectation Propagation
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
:param data_list: list of outputs
|
|
||||||
:param noise_model_list: a list of noise models
|
|
||||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
|
|
||||||
:type epsilon: float
|
|
||||||
:param power_ep: list of power ep parameters
|
|
||||||
"""
|
|
||||||
assert len(data_list) == len(noise_model_list)
|
|
||||||
self.noise_model_list = noise_model_list
|
|
||||||
n_list = [data.size for data in data_list]
|
|
||||||
self.n_models = len(data_list)
|
|
||||||
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
|
|
||||||
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
|
|
||||||
self.epsilon = epsilon
|
|
||||||
self.eta, self.delta = power_ep
|
|
||||||
self.data = np.vstack(data_list)
|
|
||||||
self.N, self.output_dim = self.data.shape
|
|
||||||
self.is_heteroscedastic = True
|
|
||||||
self.num_params = 0#FIXME
|
|
||||||
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
|
|
||||||
#TODO non-gaussian index
|
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
|
||||||
self.tau_tilde = np.zeros(self.N)
|
|
||||||
self.v_tilde = np.zeros(self.N)
|
|
||||||
|
|
||||||
#initial values for the GP variables
|
|
||||||
self.Y = np.zeros((self.N,1))
|
|
||||||
self.covariance_matrix = np.eye(self.N)
|
|
||||||
self.precision = np.ones(self.N)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
def restart(self):
|
|
||||||
self.tau_tilde = np.zeros(self.N)
|
|
||||||
self.v_tilde = np.zeros(self.N)
|
|
||||||
self.Y = np.zeros((self.N,1))
|
|
||||||
self.covariance_matrix = np.eye(self.N)
|
|
||||||
self.precision = np.ones(self.N)[:,None]
|
|
||||||
self.Z = 0
|
|
||||||
self.YYT = None
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = 0.
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov,noise_model):
|
|
||||||
"""
|
|
||||||
Predicts the output given the GP
|
|
||||||
|
|
||||||
:param mu: GP's mean
|
|
||||||
:param var: GP's variance
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: False|True
|
|
||||||
:param noise_model: noise model to use
|
|
||||||
:type noise_model: integer
|
|
||||||
"""
|
|
||||||
if full_cov:
|
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
|
||||||
#_mu = []
|
|
||||||
#_var = []
|
|
||||||
#_q1 = []
|
|
||||||
#_q2 = []
|
|
||||||
#for m,v,o in zip(mu,var,output.flatten()):
|
|
||||||
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
|
|
||||||
# _mu.append(a)
|
|
||||||
# _var.append(b)
|
|
||||||
# _q1.append(c)
|
|
||||||
# _q2.append(d)
|
|
||||||
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
|
|
||||||
return self.noise_model_list[noise_model].predictive_values(mu,var)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
names = []
|
|
||||||
for noise_model in self.noise_model_list:
|
|
||||||
names += noise_model._get_param_names()
|
|
||||||
return names
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
cs_params = np.cumsum([0]+self.n_params)
|
|
||||||
for i in range(len(self.n_params)):
|
|
||||||
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
#NOTE this is not tested
|
|
||||||
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
|
|
||||||
|
|
||||||
def _compute_GP_variables(self):
|
|
||||||
#Variables to be called from GP
|
|
||||||
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
|
||||||
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
|
||||||
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
|
||||||
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
|
||||||
|
|
||||||
self.Y = mu_tilde[:,None]
|
|
||||||
self.YYT = np.dot(self.Y,self.Y.T)
|
|
||||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
|
||||||
self.precision = self.tau_tilde[:,None]
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.V
|
|
||||||
self.trYYT = np.trace(self.YYT)
|
|
||||||
|
|
||||||
def fit_full(self,K):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm.
|
|
||||||
For nomenclature see Rasmussen & Williams 2006.
|
|
||||||
"""
|
|
||||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
|
||||||
mu = np.zeros(self.N)
|
|
||||||
Sigma = K.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.N,dtype=float)
|
|
||||||
self.v_ = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.N,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.N,dtype=float)
|
|
||||||
phi = np.empty(self.N,dtype=float)
|
|
||||||
mu_hat = np.empty(self.N,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = self.epsilon + 1.
|
|
||||||
epsilon_np2 = self.epsilon + 1.
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.N)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
|
||||||
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
|
||||||
L = jitchol(B)
|
|
||||||
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
|
||||||
Sigma = K - np.dot(V.T,V)
|
|
||||||
mu = np.dot(Sigma,self.v_tilde)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
|
||||||
|
|
||||||
def fit_DTC(self, Kmm, Kmn):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see ... 2013.
|
|
||||||
"""
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
#TODO: this doesn't work with uncertain inputs!
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
KmnKnm = np.dot(Kmn,Kmn.T)
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
|
||||||
#KmnKnm = np.dot(Kmn, Kmn.T)
|
|
||||||
#KmmiKmn = np.dot(Kmmi,Kmn)
|
|
||||||
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
|
||||||
#LLT0 = Kmm.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
mu = np.zeros(self.N)
|
|
||||||
LLT = Kmm.copy()
|
|
||||||
Sigma_diag = Qnn_diag.copy()
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.N,dtype=float)
|
|
||||||
self.v_ = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.N,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.N,dtype=float)
|
|
||||||
phi = np.empty(self.N,dtype=float)
|
|
||||||
mu_hat = np.empty(self.N,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
np1 = [self.tau_tilde.copy()]
|
|
||||||
np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.N)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
|
||||||
L = jitchol(LLT)
|
|
||||||
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
si = np.sum(V.T*V[:,i],-1)
|
|
||||||
mu += (Delta_v-Delta_tau*mu[i])*si
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomputation with Cholesky decompositon
|
|
||||||
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
|
||||||
L = jitchol(LLT)
|
|
||||||
V,info = dtrtrs(L,Kmn,lower=1)
|
|
||||||
V2,info = dtrtrs(L.T,V,lower=0)
|
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
|
||||||
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
|
||||||
mu = np.dot(V2.T,Knmv_tilde)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
|
|
||||||
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
|
|
||||||
np1.append(self.tau_tilde.copy())
|
|
||||||
np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
self._compute_GP_variables()
|
|
||||||
|
|
||||||
def fit_FITC(self, Kmm, Kmn, Knn_diag):
|
|
||||||
"""
|
|
||||||
The expectation-propagation algorithm with sparse pseudo-input.
|
|
||||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
|
||||||
"""
|
|
||||||
num_inducing = Kmm.shape[0]
|
|
||||||
|
|
||||||
"""
|
|
||||||
Prior approximation parameters:
|
|
||||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
|
||||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
|
||||||
"""
|
|
||||||
Lm = jitchol(Kmm)
|
|
||||||
Lmi = chol_inv(Lm)
|
|
||||||
Kmmi = np.dot(Lmi.T,Lmi)
|
|
||||||
P0 = Kmn.T
|
|
||||||
KmnKnm = np.dot(P0.T, P0)
|
|
||||||
KmmiKmn = np.dot(Kmmi,P0.T)
|
|
||||||
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
|
||||||
Diag0 = Knn_diag - Qnn_diag
|
|
||||||
R0 = jitchol(Kmmi).T
|
|
||||||
|
|
||||||
"""
|
|
||||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
|
||||||
Sigma = Diag + P*R.T*R*P.T + K
|
|
||||||
mu = w + P*Gamma
|
|
||||||
"""
|
|
||||||
self.w = np.zeros(self.N)
|
|
||||||
self.Gamma = np.zeros(num_inducing)
|
|
||||||
mu = np.zeros(self.N)
|
|
||||||
P = P0.copy()
|
|
||||||
R = R0.copy()
|
|
||||||
Diag = Diag0.copy()
|
|
||||||
Sigma_diag = Knn_diag
|
|
||||||
RPT0 = np.dot(R0,P0.T)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initial values - Cavity distribution parameters:
|
|
||||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
|
||||||
sigma_ = 1./tau_
|
|
||||||
mu_ = v_/tau_
|
|
||||||
"""
|
|
||||||
self.tau_ = np.empty(self.N,dtype=float)
|
|
||||||
self.v_ = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Initial values - Marginal moments
|
|
||||||
z = np.empty(self.N,dtype=float)
|
|
||||||
self.Z_hat = np.empty(self.N,dtype=float)
|
|
||||||
phi = np.empty(self.N,dtype=float)
|
|
||||||
mu_hat = np.empty(self.N,dtype=float)
|
|
||||||
sigma2_hat = np.empty(self.N,dtype=float)
|
|
||||||
|
|
||||||
#Approximation
|
|
||||||
epsilon_np1 = 1
|
|
||||||
epsilon_np2 = 1
|
|
||||||
self.iterations = 0
|
|
||||||
self.np1 = [self.tau_tilde.copy()]
|
|
||||||
self.np2 = [self.v_tilde.copy()]
|
|
||||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
|
||||||
update_order = np.random.permutation(self.N)
|
|
||||||
for i in update_order:
|
|
||||||
#Cavity distribution parameters
|
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
|
||||||
#Marginal moments
|
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
|
||||||
#Site parameters update
|
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
|
||||||
self.tau_tilde[i] += Delta_tau
|
|
||||||
self.v_tilde[i] += Delta_v
|
|
||||||
#Posterior distribution parameters update
|
|
||||||
dtd1 = Delta_tau*Diag[i] + 1.
|
|
||||||
dii = Diag[i]
|
|
||||||
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
|
||||||
pi_ = P[i,:].reshape(1,num_inducing)
|
|
||||||
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
|
||||||
Rp_i = np.dot(R,pi_.T)
|
|
||||||
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
|
||||||
R = jitchol(RTR).T
|
|
||||||
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
|
||||||
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
self.iterations += 1
|
|
||||||
#Sigma recomptutation with Cholesky decompositon
|
|
||||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
|
||||||
Diag = Diag0 * Iplus_Dprod_i
|
|
||||||
P = Iplus_Dprod_i[:,None] * P0
|
|
||||||
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
|
||||||
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
|
||||||
R,info = dtrtrs(L,R0,lower=1)
|
|
||||||
RPT = np.dot(R,P.T)
|
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
|
||||||
self.w = Diag * self.v_tilde
|
|
||||||
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
|
||||||
mu = self.w + np.dot(P,self.Gamma)
|
|
||||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
|
||||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
|
||||||
self.np1.append(self.tau_tilde.copy())
|
|
||||||
self.np2.append(self.v_tilde.copy())
|
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from likelihood import likelihood
|
|
||||||
from ..util.linalg import jitchol
|
|
||||||
from ..core.parameter import Param
|
|
||||||
|
|
||||||
|
|
||||||
class Gaussian(likelihood):
|
|
||||||
"""
|
|
||||||
Likelihood class for doing Expectation propagation
|
|
||||||
|
|
||||||
:param data: observed output
|
|
||||||
:type data: Nx1 numpy.darray
|
|
||||||
:param variance: noise parameter
|
|
||||||
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize: False|True
|
|
||||||
"""
|
|
||||||
def __init__(self, data, variance=1., normalize=False):
|
|
||||||
super(Gaussian, self).__init__('gaussian')
|
|
||||||
self.is_heteroscedastic = False
|
|
||||||
#self.num_params = 1
|
|
||||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
|
||||||
N, self.output_dim = data.shape
|
|
||||||
|
|
||||||
# normalization
|
|
||||||
if normalize:
|
|
||||||
self._offset = data.mean(0)[None, :]
|
|
||||||
self._scale = data.std(0)[None, :]
|
|
||||||
# Don't scale outputs which have zero variance to zero.
|
|
||||||
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
|
|
||||||
else:
|
|
||||||
self._offset = np.zeros((1, self.output_dim))
|
|
||||||
self._scale = np.ones((1, self.output_dim))
|
|
||||||
|
|
||||||
self.set_data(data)
|
|
||||||
|
|
||||||
self.variance = Param('variance', variance)
|
|
||||||
self.variance.add_observer(self, self.update_variance)
|
|
||||||
self.add_parameter(self.variance)
|
|
||||||
|
|
||||||
#self.parameters_changed()
|
|
||||||
# self._set_params(np.asarray(variance))
|
|
||||||
|
|
||||||
|
|
||||||
def set_data(self, data):
|
|
||||||
self.data = data
|
|
||||||
self.N, D = data.shape
|
|
||||||
assert D == self.output_dim
|
|
||||||
self.Y = (self.data - self._offset) / self._scale
|
|
||||||
if D > self.N:
|
|
||||||
self.YYT = np.dot(self.Y, self.Y.T)
|
|
||||||
self.trYYT = np.trace(self.YYT)
|
|
||||||
self.YYT_factor = jitchol(self.YYT)
|
|
||||||
else:
|
|
||||||
self.YYT = None
|
|
||||||
self.trYYT = np.sum(np.square(self.Y))
|
|
||||||
self.YYT_factor = self.Y
|
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# return np.asarray(self._variance)
|
|
||||||
#
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# return ["noise_variance"]
|
|
||||||
#
|
|
||||||
# def _set_params(self, x):
|
|
||||||
# self.variance = x[0]
|
|
||||||
|
|
||||||
def update_variance(self, v):
|
|
||||||
if np.all(self.variance == 0.): #special case of zero noise
|
|
||||||
self.precision = np.inf
|
|
||||||
self.V = None
|
|
||||||
else:
|
|
||||||
self.precision = (1. / self.variance).squeeze()
|
|
||||||
self.V = (self.precision) * self.Y
|
|
||||||
self.VVT_factor = self.precision * self.YYT_factor
|
|
||||||
self.covariance_matrix = np.eye(self.N) * self.variance
|
|
||||||
#self._variance = self.variance.copy()
|
|
||||||
|
|
||||||
# def parameters_changed(self):
|
|
||||||
# if np.any(self._variance != self.variance):
|
|
||||||
# self.update_variance()
|
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov):
|
|
||||||
"""
|
|
||||||
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
|
|
||||||
"""
|
|
||||||
mean = mu * self._scale + self._offset
|
|
||||||
if full_cov:
|
|
||||||
if self.output_dim > 1:
|
|
||||||
raise NotImplementedError, "TODO"
|
|
||||||
# Note. for output_dim>1, we need to re-normalise all the outputs independently.
|
|
||||||
# This will mess up computations of diag(true_var), below.
|
|
||||||
# note that the upper, lower quantiles should be the same shape as mean
|
|
||||||
# Augment the output variance with the likelihood variance and rescale.
|
|
||||||
true_var = (var + np.eye(var.shape[0]) * self.variance) * self._scale ** 2
|
|
||||||
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
|
|
||||||
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
|
|
||||||
else:
|
|
||||||
true_var = (var + self.variance) * self._scale ** 2
|
|
||||||
_5pc = mean - 2.*np.sqrt(true_var)
|
|
||||||
_95pc = mean + 2.*np.sqrt(true_var)
|
|
||||||
return mean, true_var, _5pc, _95pc
|
|
||||||
|
|
||||||
def fit_full(self):
|
|
||||||
"""
|
|
||||||
No approximations needed
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _gradients(self, partial):
|
|
||||||
return np.sum(partial)
|
|
||||||
|
|
@ -1,108 +0,0 @@
|
||||||
# Copyright (c) 2013, Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
|
||||||
from likelihood import likelihood
|
|
||||||
from . import Gaussian
|
|
||||||
|
|
||||||
|
|
||||||
class Gaussian_Mixed_Noise(likelihood):
|
|
||||||
"""
|
|
||||||
Gaussian Likelihood for multiple outputs
|
|
||||||
|
|
||||||
This is a wrapper around likelihood.Gaussian class
|
|
||||||
|
|
||||||
:param data_list: data observations
|
|
||||||
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
|
|
||||||
:param noise_params: noise parameters of each output
|
|
||||||
:type noise_params: list of floats, one per output
|
|
||||||
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize: False|True
|
|
||||||
"""
|
|
||||||
def __init__(self, data_list, noise_params=None, normalize=True):
|
|
||||||
self.num_params = len(data_list)
|
|
||||||
self.n_list = [data.size for data in data_list]
|
|
||||||
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
|
|
||||||
|
|
||||||
if noise_params is None:
|
|
||||||
noise_params = [1.] * self.num_params
|
|
||||||
else:
|
|
||||||
assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
|
|
||||||
|
|
||||||
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
|
|
||||||
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
|
|
||||||
self.data = np.vstack(data_list)
|
|
||||||
self.N, self.output_dim = self.data.shape
|
|
||||||
self._offset = np.zeros((1, self.output_dim))
|
|
||||||
self._scale = np.ones((1, self.output_dim))
|
|
||||||
|
|
||||||
self.is_heteroscedastic = True
|
|
||||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
|
||||||
|
|
||||||
self.set_data(data_list)
|
|
||||||
self._set_params(np.asarray(noise_params))
|
|
||||||
|
|
||||||
super(Gaussian_Mixed_Noise, self).__init__()
|
|
||||||
|
|
||||||
def set_data(self, data_list):
|
|
||||||
self.data = np.vstack(data_list)
|
|
||||||
self.N, D = self.data.shape
|
|
||||||
assert D == self.output_dim
|
|
||||||
self.Y = (self.data - self._offset) / self._scale
|
|
||||||
if D > self.N:
|
|
||||||
raise NotImplementedError
|
|
||||||
#self.YYT = np.dot(self.Y, self.Y.T)
|
|
||||||
#self.trYYT = np.trace(self.YYT)
|
|
||||||
#self.YYT_factor = jitchol(self.YYT)
|
|
||||||
else:
|
|
||||||
self.YYT = None
|
|
||||||
self.trYYT = np.sum(np.square(self.Y))
|
|
||||||
self.YYT_factor = self.Y
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var,full_cov,noise_model):
|
|
||||||
"""
|
|
||||||
Predicts the output given the GP
|
|
||||||
|
|
||||||
:param mu: GP's mean
|
|
||||||
:param var: GP's variance
|
|
||||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: False|True
|
|
||||||
:param noise_model: noise model to use
|
|
||||||
:type noise_model: integer
|
|
||||||
"""
|
|
||||||
if full_cov:
|
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
|
||||||
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
if len(self.noise_model_list) == 1:
|
|
||||||
names = self.noise_model_list[0]._get_param_names()
|
|
||||||
else:
|
|
||||||
names = []
|
|
||||||
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
|
|
||||||
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
|
|
||||||
return names
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
cs_params = np.cumsum([0]+self.n_params)
|
|
||||||
|
|
||||||
for i in range(len(self.n_params)):
|
|
||||||
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
|
||||||
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
|
|
||||||
|
|
||||||
self.V = self.precision * self.Y
|
|
||||||
self.VVT_factor = self.precision * self.YYT_factor
|
|
||||||
self.covariance_matrix = np.eye(self.N) * 1./self.precision
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
gradients = []
|
|
||||||
aux = np.cumsum([0]+self.n_list)
|
|
||||||
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
|
|
||||||
gradients += [noise_model._gradients(partial[ai:af])]
|
|
||||||
return np.hstack(gradients)
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import copy
|
|
||||||
from ..core.parameterized import Parameterized
|
|
||||||
|
|
||||||
class likelihood(Parameterized):
|
|
||||||
"""
|
|
||||||
The atom for a likelihood class
|
|
||||||
|
|
||||||
This object interfaces the GP and the data. The most basic likelihood
|
|
||||||
(Gaussian) inherits directly from this, as does the EP algorithm
|
|
||||||
|
|
||||||
Some things must be defined for this to work properly:
|
|
||||||
|
|
||||||
- self.Y : the effective Gaussian target of the GP
|
|
||||||
- self.N, self.D : Y.shape
|
|
||||||
- self.covariance_matrix : the effective (noise) covariance of the GP targets
|
|
||||||
- self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
|
|
||||||
- self.is_heteroscedastic : enables significant computational savings in GP
|
|
||||||
- self.precision : a scalar or vector representation of the effective target precision
|
|
||||||
- self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
|
|
||||||
- self.V : self.precision * self.Y
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, name=None):
|
|
||||||
Parameterized.__init__(self, name)
|
|
||||||
|
|
||||||
# def _get_params(self):
|
|
||||||
# raise NotImplementedError
|
|
||||||
#
|
|
||||||
# def _get_param_names(self):
|
|
||||||
# raise NotImplementedError
|
|
||||||
#
|
|
||||||
# def _set_params(self, x):
|
|
||||||
# raise NotImplementedError
|
|
||||||
|
|
||||||
def fit(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def _gradients(self, partial):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def predictive_values(self, mu, var):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
@ -1,88 +0,0 @@
|
||||||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import noise_models
|
|
||||||
|
|
||||||
def binomial(gp_link=None):
|
|
||||||
"""
|
|
||||||
Construct a binomial likelihood
|
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
|
||||||
"""
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = noise_models.gp_transformations.Probit()
|
|
||||||
#else:
|
|
||||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
|
||||||
|
|
||||||
if isinstance(gp_link,noise_models.gp_transformations.Probit):
|
|
||||||
analytical_mean = True
|
|
||||||
analytical_variance = False
|
|
||||||
|
|
||||||
elif isinstance(gp_link,noise_models.gp_transformations.Heaviside):
|
|
||||||
analytical_mean = True
|
|
||||||
analytical_variance = True
|
|
||||||
|
|
||||||
else:
|
|
||||||
analytical_mean = False
|
|
||||||
analytical_variance = False
|
|
||||||
|
|
||||||
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def exponential(gp_link=None):
|
|
||||||
"""
|
|
||||||
Construct a binomial likelihood
|
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
|
||||||
"""
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = noise_models.gp_transformations.Identity()
|
|
||||||
|
|
||||||
analytical_mean = False
|
|
||||||
analytical_variance = False
|
|
||||||
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def gaussian_ep(gp_link=None,variance=1.):
|
|
||||||
"""
|
|
||||||
Construct a gaussian likelihood
|
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
|
||||||
:param variance: scalar
|
|
||||||
"""
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = noise_models.gp_transformations.Identity()
|
|
||||||
#else:
|
|
||||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
|
||||||
|
|
||||||
analytical_mean = False
|
|
||||||
analytical_variance = False
|
|
||||||
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
|
|
||||||
|
|
||||||
def poisson(gp_link=None):
|
|
||||||
"""
|
|
||||||
Construct a Poisson likelihood
|
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
|
||||||
"""
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = noise_models.gp_transformations.Log_ex_1()
|
|
||||||
#else:
|
|
||||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
|
||||||
analytical_mean = False
|
|
||||||
analytical_variance = False
|
|
||||||
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def gamma(gp_link=None,beta=1.):
|
|
||||||
"""
|
|
||||||
Construct a Gamma likelihood
|
|
||||||
|
|
||||||
:param gp_link: a GPy gp_link function
|
|
||||||
:param beta: scalar
|
|
||||||
"""
|
|
||||||
if gp_link is None:
|
|
||||||
gp_link = noise_models.gp_transformations.Log_ex_1()
|
|
||||||
analytical_mean = False
|
|
||||||
analytical_variance = False
|
|
||||||
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
import noise_distributions
|
|
||||||
import binomial_noise
|
|
||||||
import exponential_noise
|
|
||||||
import gaussian_noise
|
|
||||||
import gamma_noise
|
|
||||||
import poisson_noise
|
|
||||||
import gp_transformations
|
|
||||||
|
|
@ -1,132 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Binomial(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Probit likelihood
|
|
||||||
Y is expected to take values in {-1,1}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \\Phi (Y_i*f_i)
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
|
||||||
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
"""
|
|
||||||
Check if the values of the observations correspond to the values
|
|
||||||
assumed by the likelihood function.
|
|
||||||
|
|
||||||
..Note:: Binary classification algorithm works better with classes {-1,1}
|
|
||||||
"""
|
|
||||||
Y_prep = Y.copy()
|
|
||||||
Y1 = Y[Y.flatten()==1].size
|
|
||||||
Y2 = Y[Y.flatten()==0].size
|
|
||||||
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
|
||||||
Y_prep[Y.flatten() == 0] = -1
|
|
||||||
return Y_prep
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Probit):
|
|
||||||
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
|
||||||
Z_hat = std_norm_cdf(z)
|
|
||||||
phi = std_norm_pdf(z)
|
|
||||||
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
|
||||||
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
|
||||||
|
|
||||||
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
a = data_i*v_i/np.sqrt(tau_i)
|
|
||||||
Z_hat = std_norm_cdf(a)
|
|
||||||
N = std_norm_pdf(a)
|
|
||||||
mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i)
|
|
||||||
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
|
|
||||||
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
|
|
||||||
stop
|
|
||||||
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def _predictive_mean_analytical(self,mu,sigma):
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Probit):
|
|
||||||
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
|
|
||||||
elif isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
return stats.norm.cdf(mu/sigma)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
|
|
||||||
if isinstance(self.gp_link,gp_transformations.Heaviside):
|
|
||||||
return 0.
|
|
||||||
else:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
#NOTE obs must be in {0,1}
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return p**obs * (1.-p)**(1.-obs)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return obs*np.log(p) + (1.-obs)*np.log(1-p)
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
dp = self.gp_link.dtransf_df(gp)
|
|
||||||
return obs/p * dp - (1.-obs)/(1.-p) * dp
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
p = self.gp_link.transf(gp)
|
|
||||||
return p*(1.-p)
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
|
|
||||||
|
|
||||||
|
|
||||||
def samples(self, gp):
|
|
||||||
"""
|
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
|
||||||
|
|
||||||
:param size: number of samples to compute
|
|
||||||
:param gp: latent variable
|
|
||||||
"""
|
|
||||||
orig_shape = gp.shape
|
|
||||||
gp = gp.flatten()
|
|
||||||
Ysim = np.array([np.random.binomial(1,self.gp_link.transf(gpj),size=1) for gpj in gp])
|
|
||||||
return Ysim.reshape(orig_shape)
|
|
||||||
|
|
@ -1,68 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Exponential(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Expoential likelihood
|
|
||||||
Y is expected to take values in {0,1,2,...}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
|
||||||
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
fgp = self.gp_link.transf(gp)
|
|
||||||
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)**2
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Gamma(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Gamma likelihood
|
|
||||||
Y is expected to take values in {0,1,2,...}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
|
||||||
self.beta = beta
|
|
||||||
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
|
|
||||||
alpha = self.gp_link.transf(gp)*self.beta
|
|
||||||
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
alpha = self.gp_link.transf(gp)*self.beta
|
|
||||||
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)/self.beta
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)/self.beta
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)/self.beta
|
|
||||||
|
|
@ -1,98 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Gaussian(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Gaussian likelihood
|
|
||||||
|
|
||||||
:param mean: mean value of the Gaussian distribution
|
|
||||||
:param variance: mean value of the Gaussian distribution
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.):
|
|
||||||
self.variance = variance
|
|
||||||
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.array([self.variance])
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return ['noise_model_variance']
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
self.variance = p
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
return np.zeros(1)
|
|
||||||
#return np.sum(partial)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
"""
|
|
||||||
Check if the values of the observations correspond to the values
|
|
||||||
assumed by the likelihood function.
|
|
||||||
"""
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
sigma2_hat = 1./(1./self.variance + tau_i)
|
|
||||||
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
|
|
||||||
sum_var = self.variance + 1./tau_i
|
|
||||||
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def _predictive_mean_analytical(self,mu,sigma):
|
|
||||||
new_sigma2 = self.predictive_variance(mu,sigma)
|
|
||||||
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma):
|
|
||||||
return 1./(1./self.variance + 1./sigma**2)
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
|
|
||||||
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.variance
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return 0
|
|
||||||
|
|
@ -1,133 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
import scipy as sp
|
|
||||||
import pylab as pb
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
|
||||||
|
|
||||||
class GPTransformation(object):
|
|
||||||
"""
|
|
||||||
Link function class for doing non-Gaussian likelihoods approximation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
|
|
||||||
.. note:: Y values allowed depend on the likelihood_function used
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def transf(self,f):
|
|
||||||
"""
|
|
||||||
Gaussian process tranformation function, latent space -> output space
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
"""
|
|
||||||
derivative of transf(f) w.r.t. f
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
"""
|
|
||||||
second derivative of transf(f) w.r.t. f
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Identity(GPTransformation):
|
|
||||||
"""
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
g(f) = f
|
|
||||||
|
|
||||||
"""
|
|
||||||
def transf(self,f):
|
|
||||||
return f
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
return 1.
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
class Probit(GPTransformation):
|
|
||||||
"""
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
g(f) = \\Phi^{-1} (mu)
|
|
||||||
|
|
||||||
"""
|
|
||||||
def transf(self,f):
|
|
||||||
return std_norm_cdf(f)
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
return std_norm_pdf(f)
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
return -f * std_norm_pdf(f)
|
|
||||||
|
|
||||||
class Log(GPTransformation):
|
|
||||||
"""
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
g(f) = \\log(\\mu)
|
|
||||||
|
|
||||||
"""
|
|
||||||
def transf(self,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
class Log_ex_1(GPTransformation):
|
|
||||||
"""
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
g(f) = \\log(\\exp(\\mu) - 1)
|
|
||||||
|
|
||||||
"""
|
|
||||||
def transf(self,f):
|
|
||||||
return np.log(1.+np.exp(f))
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
return np.exp(f)/(1.+np.exp(f))
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
aux = np.exp(f)/(1.+np.exp(f))
|
|
||||||
return aux*(1.-aux)
|
|
||||||
|
|
||||||
class Reciprocal(GPTransformation):
|
|
||||||
def transf(sefl,f):
|
|
||||||
return 1./f
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
return -1./f**2
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
return 2./f**3
|
|
||||||
|
|
||||||
class Heaviside(GPTransformation):
|
|
||||||
"""
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
|
|
||||||
g(f) = I_{x \\in A}
|
|
||||||
|
|
||||||
"""
|
|
||||||
def transf(self,f):
|
|
||||||
#transformation goes here
|
|
||||||
return np.where(f>0, 1, 0)
|
|
||||||
|
|
||||||
def dtransf_df(self,f):
|
|
||||||
raise NotImplementedError, "This function is not differentiable!"
|
|
||||||
|
|
||||||
def d2transf_df2(self,f):
|
|
||||||
raise NotImplementedError, "This function is not differentiable!"
|
|
||||||
|
|
@ -1,425 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
import pylab as pb
|
|
||||||
from GPy.util.plot import gpplot
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
|
|
||||||
|
|
||||||
class NoiseDistribution(object):
|
|
||||||
"""
|
|
||||||
Likelihood class for doing Expectation propagation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
|
|
||||||
.. note:: Y values allowed depend on the LikelihoodFunction used
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
|
||||||
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
|
|
||||||
self.gp_link = gp_link
|
|
||||||
self.analytical_mean = analytical_mean
|
|
||||||
self.analytical_variance = analytical_variance
|
|
||||||
if self.analytical_mean:
|
|
||||||
self.moments_match = self._moments_match_analytical
|
|
||||||
self.predictive_mean = self._predictive_mean_analytical
|
|
||||||
else:
|
|
||||||
self.moments_match = self._moments_match_numerical
|
|
||||||
self.predictive_mean = self._predictive_mean_numerical
|
|
||||||
if self.analytical_variance:
|
|
||||||
self.predictive_variance = self._predictive_variance_analytical
|
|
||||||
else:
|
|
||||||
self.predictive_variance = self._predictive_variance_numerical
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.zeros(0)
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _set_params(self,p):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _gradients(self,partial):
|
|
||||||
return np.zeros(0)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
"""
|
|
||||||
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
|
||||||
|
|
||||||
:param Y: observed output
|
|
||||||
:type Y: Nx1 numpy.darray
|
|
||||||
|
|
||||||
"""
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _product(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
|
|
||||||
|
|
||||||
def _nlog_product_scaled(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
.. note:: The constant term in the Gaussian distribution is ignored.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
|
|
||||||
|
|
||||||
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
|
|
||||||
|
|
||||||
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
|
|
||||||
|
|
||||||
def _product_mode(self,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Newton's CG method to find the mode in _product (cavity x likelihood factor).
|
|
||||||
|
|
||||||
:param obs: observed output
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,obs,tau,v):
|
|
||||||
"""
|
|
||||||
If available, this function computes the moments analytically.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _moments_match_numerical(self,obs,tau,v):
|
|
||||||
"""
|
|
||||||
Lapace approximation to calculate the moments.
|
|
||||||
|
|
||||||
:param obs: observed output
|
|
||||||
:param tau: cavity distribution 1st natural parameter (precision)
|
|
||||||
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
|
||||||
|
|
||||||
"""
|
|
||||||
mu = v/tau
|
|
||||||
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
|
|
||||||
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
|
|
||||||
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
|
|
||||||
return Z_hat,mu_hat,sigma2_hat
|
|
||||||
|
|
||||||
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
.. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
|
|
||||||
|
|
||||||
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
|
|
||||||
|
|
||||||
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
|
|
||||||
|
|
||||||
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
.. note:: This function helps computing E(V(Y_star|f_star))
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
|
|
||||||
|
|
||||||
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
|
|
||||||
|
|
||||||
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
|
|
||||||
|
|
||||||
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
.. note:: This function helps computing E( E(Y_star|f_star)**2 )
|
|
||||||
|
|
||||||
"""
|
|
||||||
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
|
|
||||||
|
|
||||||
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
|
|
||||||
|
|
||||||
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
|
|
||||||
"""
|
|
||||||
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
|
|
||||||
|
|
||||||
def _predictive_mean_analytical(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
If available, this function computes the predictive mean analytically.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _predictive_variance_analytical(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
If available, this function computes the predictive variance analytically.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _predictive_mean_numerical(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
|
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
|
|
||||||
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
|
|
||||||
"""
|
|
||||||
|
|
||||||
pb.figure()
|
|
||||||
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
|
||||||
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
|
||||||
pb.plot(x,f,'b-')
|
|
||||||
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
|
|
||||||
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
|
||||||
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
|
||||||
pb.plot(x,f2*mean,'r-')
|
|
||||||
pb.vlines(maximum,0,f.max())
|
|
||||||
"""
|
|
||||||
return mean
|
|
||||||
|
|
||||||
def _predictive_mean_sq(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
|
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
|
|
||||||
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
|
|
||||||
return mean_squared
|
|
||||||
|
|
||||||
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
|
|
||||||
"""
|
|
||||||
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
|
||||||
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# E( V(Y_star|f_star) )
|
|
||||||
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
|
|
||||||
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
|
|
||||||
|
|
||||||
"""
|
|
||||||
pb.figure()
|
|
||||||
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
|
||||||
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
|
||||||
pb.plot(x,f,'b-')
|
|
||||||
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
|
|
||||||
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
|
||||||
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
|
||||||
pb.plot(x,f2*exp_var,'r--')
|
|
||||||
pb.vlines(maximum,0,f.max())
|
|
||||||
"""
|
|
||||||
|
|
||||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
|
|
||||||
exp_exp2 = self._predictive_mean_sq(mu,sigma)
|
|
||||||
if predictive_mean is None:
|
|
||||||
predictive_mean = self.predictive_mean(mu,sigma)
|
|
||||||
var_exp = exp_exp2 - predictive_mean**2
|
|
||||||
return exp_var + var_exp
|
|
||||||
|
|
||||||
def _predictive_percentiles(self,p,mu,sigma):
|
|
||||||
"""
|
|
||||||
Percentiles of the predictive distribution
|
|
||||||
|
|
||||||
:parm p: lower tail probability
|
|
||||||
:param mu: cavity distribution mean
|
|
||||||
:param sigma: cavity distribution standard deviation
|
|
||||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
|
||||||
|
|
||||||
"""
|
|
||||||
qf = stats.norm.ppf(p,mu,sigma)
|
|
||||||
return self.gp_link.transf(qf)
|
|
||||||
|
|
||||||
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the joint predictive distribution (latent variable and output).
|
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
|
||||||
:param mu: latent variable's predictive mean
|
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
|
|
||||||
|
|
||||||
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
|
|
||||||
"""
|
|
||||||
Gradient of _nlog_joint_predictive_scaled.
|
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
|
||||||
:param mu: latent variable's predictive mean
|
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
.. note: Only available when the output is continuous
|
|
||||||
|
|
||||||
"""
|
|
||||||
assert not self.discrete, "Gradient not available for discrete outputs."
|
|
||||||
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
|
|
||||||
|
|
||||||
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
|
|
||||||
"""
|
|
||||||
Hessian of _nlog_joint_predictive_scaled.
|
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
|
||||||
:param mu: latent variable's predictive mean
|
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
.. note: Only available when the output is continuous
|
|
||||||
|
|
||||||
"""
|
|
||||||
assert not self.discrete, "Hessian not available for discrete outputs."
|
|
||||||
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
|
|
||||||
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
|
|
||||||
|
|
||||||
def _joint_predictive_mode(self,mu,sigma):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the joint predictive distribution (latent variable and output).
|
|
||||||
|
|
||||||
:param x: tuple (latent variable,output)
|
|
||||||
:param mu: latent variable's predictive mean
|
|
||||||
:param sigma: latent variable's predictive standard deviation
|
|
||||||
|
|
||||||
"""
|
|
||||||
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var):
|
|
||||||
"""
|
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
|
||||||
|
|
||||||
:param mu: mean of the latent variable
|
|
||||||
:param var: variance of the latent variable
|
|
||||||
|
|
||||||
"""
|
|
||||||
if isinstance(mu,float) or isinstance(mu,int):
|
|
||||||
mu = [mu]
|
|
||||||
var = [var]
|
|
||||||
pred_mean = []
|
|
||||||
pred_var = []
|
|
||||||
q1 = []
|
|
||||||
q3 = []
|
|
||||||
for m,s in zip(mu,np.sqrt(var)):
|
|
||||||
pred_mean.append(self.predictive_mean(m,s))
|
|
||||||
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
|
|
||||||
q1.append(self._predictive_percentiles(.025,m,s))
|
|
||||||
q3.append(self._predictive_percentiles(.975,m,s))
|
|
||||||
pred_mean = np.vstack(pred_mean)
|
|
||||||
pred_var = np.vstack(pred_var)
|
|
||||||
q1 = np.vstack(q1)
|
|
||||||
q3 = np.vstack(q3)
|
|
||||||
return pred_mean, pred_var, q1, q3
|
|
||||||
|
|
||||||
|
|
||||||
def samples(self, gp):
|
|
||||||
"""
|
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
@ -1,69 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats,special
|
|
||||||
import scipy as sp
|
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import gp_transformations
|
|
||||||
from noise_distributions import NoiseDistribution
|
|
||||||
|
|
||||||
class Poisson(NoiseDistribution):
|
|
||||||
"""
|
|
||||||
Poisson likelihood
|
|
||||||
|
|
||||||
.. math::
|
|
||||||
L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
|
|
||||||
|
|
||||||
..Note: Y is expected to take values in {0,1,2,...}
|
|
||||||
"""
|
|
||||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
|
||||||
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y): #TODO
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _nlog_mass(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
|
|
||||||
|
|
||||||
def _dnlog_mass_dgp(self,gp,obs):
|
|
||||||
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
|
|
||||||
|
|
||||||
def _d2nlog_mass_dgp2(self,gp,obs):
|
|
||||||
d2_df = self.gp_link.d2transf_df2(gp)
|
|
||||||
transf = self.gp_link.transf(gp)
|
|
||||||
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
|
||||||
|
|
||||||
def _mean(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dmean_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2mean_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
|
|
||||||
def _variance(self,gp):
|
|
||||||
"""
|
|
||||||
Mass (or density) function
|
|
||||||
"""
|
|
||||||
return self.gp_link.transf(gp)
|
|
||||||
|
|
||||||
def _dvariance_dgp(self,gp):
|
|
||||||
return self.gp_link.dtransf_df(gp)
|
|
||||||
|
|
||||||
def _d2variance_dgp2(self,gp):
|
|
||||||
return self.gp_link.d2transf_df2(gp)
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue