mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
[huge merge] trying to merge old master and master
This commit is contained in:
commit
180650ec85
308 changed files with 27071 additions and 24550 deletions
|
|
@ -1,7 +1,8 @@
|
|||
from ep import EP
|
||||
from laplace import Laplace
|
||||
from ep_mixed_noise import EP_Mixed_Noise
|
||||
from bernoulli import Bernoulli
|
||||
from exponential import Exponential
|
||||
from gaussian import Gaussian
|
||||
from gaussian_mixed_noise import Gaussian_Mixed_Noise
|
||||
import noise_models
|
||||
from noise_model_constructors import *
|
||||
from gamma import Gamma
|
||||
from poisson import Poisson
|
||||
from student_t import StudentT
|
||||
from likelihood import Likelihood
|
||||
from mixed_noise import MixedNoise
|
||||
|
|
|
|||
224
GPy/likelihoods/bernoulli.py
Normal file
224
GPy/likelihoods/bernoulli.py
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from scipy import stats
|
||||
|
||||
class Bernoulli(Likelihood):
|
||||
"""
|
||||
Bernoulli likelihood
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
.. Note::
|
||||
Y takes values in either {-1, 1} or {0, 1}.
|
||||
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||
|
||||
.. See also::
|
||||
likelihood.py, for the parent class
|
||||
"""
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Probit()
|
||||
|
||||
super(Bernoulli, self).__init__(gp_link, 'Bernoulli')
|
||||
|
||||
if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
|
||||
self.log_concave = True
|
||||
|
||||
def _preprocess_values(self, Y):
|
||||
"""
|
||||
Check if the values of the observations correspond to the values
|
||||
assumed by the likelihood function.
|
||||
|
||||
..Note:: Binary classification algorithm works better with classes {-1, 1}
|
||||
"""
|
||||
Y_prep = Y.copy()
|
||||
Y1 = Y[Y.flatten()==1].size
|
||||
Y2 = Y[Y.flatten()==0].size
|
||||
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0, 1}.'
|
||||
Y_prep[Y.flatten() == 0] = -1
|
||||
return Y_prep
|
||||
|
||||
def moments_match_ep(self, Y_i, tau_i, v_i):
|
||||
"""
|
||||
Moments match of the marginal approximation in EP algorithm
|
||||
|
||||
:param i: number of observation (int)
|
||||
:param tau_i: precision of the cavity distribution (float)
|
||||
:param v_i: mean/variance of the cavity distribution (float)
|
||||
"""
|
||||
if Y_i == 1:
|
||||
sign = 1.
|
||||
elif Y_i == 0 or Y_i == -1:
|
||||
sign = -1
|
||||
else:
|
||||
raise ValueError("bad value for Bernoulli observation (0, 1)")
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||
Z_hat = std_norm_cdf(z)
|
||||
phi = std_norm_pdf(z)
|
||||
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
||||
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
||||
|
||||
elif isinstance(self.gp_link, link_functions.Heaviside):
|
||||
a = sign*v_i/np.sqrt(tau_i)
|
||||
Z_hat = std_norm_cdf(a)
|
||||
N = std_norm_pdf(a)
|
||||
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
|
||||
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
|
||||
else:
|
||||
#TODO: do we want to revert to numerical quadrature here?
|
||||
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.__name__))
|
||||
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def predictive_mean(self, mu, variance, Y_metadata=None):
|
||||
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
return stats.norm.cdf(mu/np.sqrt(1+variance))
|
||||
|
||||
elif isinstance(self.gp_link, link_functions.Heaviside):
|
||||
return stats.norm.cdf(mu/np.sqrt(variance))
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):
|
||||
|
||||
if isinstance(self.gp_link, link_functions.Heaviside):
|
||||
return 0.
|
||||
else:
|
||||
return np.nan
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
.. Note:
|
||||
Each y_i must be in {0, 1}
|
||||
"""
|
||||
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
|
||||
return np.where(y, inv_link_f, 1.-inv_link_f)
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: log likelihood evaluated at points inverse link of f.
|
||||
:rtype: float
|
||||
"""
|
||||
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
|
||||
p = np.where(y==1, inv_link_f, 1.-inv_link_f)
|
||||
return np.log(np.clip(p, 1e-6 ,np.inf))
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
|
||||
#grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
|
||||
ff = np.clip(inv_link_f, 1e-6, 1-1e-6)
|
||||
denom = np.where(y, ff, -(1-ff))
|
||||
return 1./denom
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||
"""
|
||||
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
|
||||
#d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
|
||||
arg = np.where(y, inv_link_f, 1.-inv_link_f)
|
||||
ret = -1./np.square(np.clip(arg, 1e-3, np.inf))
|
||||
if np.any(np.isinf(ret)):
|
||||
stop
|
||||
return ret
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
|
||||
|
||||
:param inv_link_f: latent variables passed through inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in bernoulli
|
||||
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
|
||||
state = np.seterr(divide='ignore')
|
||||
# TODO check y \in {0, 1} or {-1, 1}
|
||||
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
|
||||
np.seterr(**state)
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
ns = np.ones_like(gp, dtype=int)
|
||||
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
pass
|
||||
|
|
@ -1,392 +0,0 @@
|
|||
import numpy as np
|
||||
from scipy import stats
|
||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||
from likelihood import likelihood
|
||||
|
||||
class EP(likelihood):
|
||||
def __init__(self,data,noise_model):
|
||||
"""
|
||||
Expectation Propagation
|
||||
|
||||
:param data: data to model
|
||||
:type data: numpy array
|
||||
:param noise_model: noise distribution
|
||||
:type noise_model: A GPy noise model
|
||||
|
||||
"""
|
||||
self.noise_model = noise_model
|
||||
self.data = data
|
||||
self.num_data, self.output_dim = self.data.shape
|
||||
self.is_heteroscedastic = True
|
||||
self.num_params = 0
|
||||
|
||||
#Initial values - Likelihood approximation parameters:
|
||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||
self.tau_tilde = np.zeros(self.num_data)
|
||||
self.v_tilde = np.zeros(self.num_data)
|
||||
|
||||
#initial values for the GP variables
|
||||
self.Y = np.zeros((self.num_data,1))
|
||||
self.covariance_matrix = np.eye(self.num_data)
|
||||
self.precision = np.ones(self.num_data)[:,None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = 0.
|
||||
|
||||
super(EP, self).__init__()
|
||||
|
||||
def restart(self):
|
||||
self.tau_tilde = np.zeros(self.num_data)
|
||||
self.v_tilde = np.zeros(self.num_data)
|
||||
self.Y = np.zeros((self.num_data,1))
|
||||
self.covariance_matrix = np.eye(self.num_data)
|
||||
self.precision = np.ones(self.num_data)[:,None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = 0.
|
||||
|
||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||
if full_cov:
|
||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||
return self.noise_model.predictive_values(mu,var,**noise_args)
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type mu_star: (Nx1) array
|
||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
"""
|
||||
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||
|
||||
def _get_params(self):
|
||||
#return np.zeros(0)
|
||||
return self.noise_model._get_params()
|
||||
|
||||
def _get_param_names(self):
|
||||
#return []
|
||||
return self.noise_model._get_param_names()
|
||||
|
||||
def _set_params(self,p):
|
||||
#pass # TODO: the EP likelihood might want to take some parameters...
|
||||
self.noise_model._set_params(p)
|
||||
|
||||
def _gradients(self,partial):
|
||||
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
||||
return self.noise_model._gradients(partial)
|
||||
|
||||
def _compute_GP_variables(self):
|
||||
#Variables to be called from GP
|
||||
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
||||
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
||||
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
||||
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
||||
self.Z += 0.5*self.num_data*np.log(2*np.pi)
|
||||
|
||||
self.Y = mu_tilde[:,None]
|
||||
self.YYT = np.dot(self.Y,self.Y.T)
|
||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
||||
self.precision = self.tau_tilde[:,None]
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = np.trace(self.YYT)
|
||||
|
||||
def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
|
||||
"""
|
||||
The expectation-propagation algorithm.
|
||||
For nomenclature see Rasmussen & Williams 2006.
|
||||
|
||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||
:type epsilon: float
|
||||
:param power_ep: Power EP parameters
|
||||
:type power_ep: list of floats
|
||||
|
||||
"""
|
||||
self.epsilon = epsilon
|
||||
self.eta, self.delta = power_ep
|
||||
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
mu = np.zeros(self.num_data)
|
||||
Sigma = K.copy()
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
||||
self.v_ = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.num_data,dtype=float)
|
||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
||||
phi = np.empty(self.num_data,dtype=float)
|
||||
mu_hat = np.empty(self.num_data,dtype=float)
|
||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = self.epsilon + 1.
|
||||
epsilon_np2 = self.epsilon + 1.
|
||||
self.iterations = 0
|
||||
self.np1 = [self.tau_tilde.copy()]
|
||||
self.np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.num_data)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
||||
mu = np.dot(Sigma,self.v_tilde)
|
||||
self.iterations += 1
|
||||
#Sigma recomptutation with Cholesky decompositon
|
||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
||||
B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
||||
L = jitchol(B)
|
||||
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
||||
Sigma = K - np.dot(V.T,V)
|
||||
mu = np.dot(Sigma,self.v_tilde)
|
||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
||||
self.np1.append(self.tau_tilde.copy())
|
||||
self.np2.append(self.v_tilde.copy())
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
||||
def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
|
||||
"""
|
||||
The expectation-propagation algorithm with sparse pseudo-input.
|
||||
For nomenclature see ... 2013.
|
||||
|
||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||
:type epsilon: float
|
||||
:param power_ep: Power EP parameters
|
||||
:type power_ep: list of floats
|
||||
|
||||
"""
|
||||
self.epsilon = epsilon
|
||||
self.eta, self.delta = power_ep
|
||||
|
||||
num_inducing = Kmm.shape[0]
|
||||
|
||||
#TODO: this doesn't work with uncertain inputs!
|
||||
|
||||
"""
|
||||
Prior approximation parameters:
|
||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
||||
"""
|
||||
KmnKnm = np.dot(Kmn,Kmn.T)
|
||||
Lm = jitchol(Kmm)
|
||||
Lmi = chol_inv(Lm)
|
||||
Kmmi = np.dot(Lmi.T,Lmi)
|
||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||
LLT0 = Kmm.copy()
|
||||
|
||||
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
||||
#KmnKnm = np.dot(Kmn, Kmn.T)
|
||||
#KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||
#LLT0 = Kmm.copy()
|
||||
|
||||
"""
|
||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||
Sigma = Diag + P*R.T*R*P.T + K
|
||||
mu = w + P*Gamma
|
||||
"""
|
||||
mu = np.zeros(self.num_data)
|
||||
LLT = Kmm.copy()
|
||||
Sigma_diag = Qnn_diag.copy()
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
||||
self.v_ = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.num_data,dtype=float)
|
||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
||||
phi = np.empty(self.num_data,dtype=float)
|
||||
mu_hat = np.empty(self.num_data,dtype=float)
|
||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = 1
|
||||
epsilon_np2 = 1
|
||||
self.iterations = 0
|
||||
np1 = [self.tau_tilde.copy()]
|
||||
np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.num_data)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
||||
L = jitchol(LLT)
|
||||
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
Sigma_diag = np.sum(V*V,-2)
|
||||
si = np.sum(V.T*V[:,i],-1)
|
||||
mu += (Delta_v-Delta_tau*mu[i])*si
|
||||
self.iterations += 1
|
||||
#Sigma recomputation with Cholesky decompositon
|
||||
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
||||
L = jitchol(LLT)
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
V2,info = dtrtrs(L.T,V,lower=0)
|
||||
Sigma_diag = np.sum(V*V,-2)
|
||||
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
||||
mu = np.dot(V2.T,Knmv_tilde)
|
||||
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
|
||||
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
|
||||
np1.append(self.tau_tilde.copy())
|
||||
np2.append(self.v_tilde.copy())
|
||||
|
||||
self._compute_GP_variables()
|
||||
|
||||
def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
|
||||
"""
|
||||
The expectation-propagation algorithm with sparse pseudo-input.
|
||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
||||
|
||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||
:type epsilon: float
|
||||
:param power_ep: Power EP parameters
|
||||
:type power_ep: list of floats
|
||||
"""
|
||||
self.epsilon = epsilon
|
||||
self.eta, self.delta = power_ep
|
||||
|
||||
num_inducing = Kmm.shape[0]
|
||||
|
||||
"""
|
||||
Prior approximation parameters:
|
||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
||||
"""
|
||||
Lm = jitchol(Kmm)
|
||||
Lmi = chol_inv(Lm)
|
||||
Kmmi = np.dot(Lmi.T,Lmi)
|
||||
P0 = Kmn.T
|
||||
KmnKnm = np.dot(P0.T, P0)
|
||||
KmmiKmn = np.dot(Kmmi,P0.T)
|
||||
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
||||
Diag0 = Knn_diag - Qnn_diag
|
||||
R0 = jitchol(Kmmi).T
|
||||
|
||||
"""
|
||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||
Sigma = Diag + P*R.T*R*P.T + K
|
||||
mu = w + P*Gamma
|
||||
"""
|
||||
self.w = np.zeros(self.num_data)
|
||||
self.Gamma = np.zeros(num_inducing)
|
||||
mu = np.zeros(self.num_data)
|
||||
P = P0.copy()
|
||||
R = R0.copy()
|
||||
Diag = Diag0.copy()
|
||||
Sigma_diag = Knn_diag
|
||||
RPT0 = np.dot(R0,P0.T)
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.num_data,dtype=float)
|
||||
self.v_ = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.num_data,dtype=float)
|
||||
self.Z_hat = np.empty(self.num_data,dtype=float)
|
||||
phi = np.empty(self.num_data,dtype=float)
|
||||
mu_hat = np.empty(self.num_data,dtype=float)
|
||||
sigma2_hat = np.empty(self.num_data,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = 1
|
||||
epsilon_np2 = 1
|
||||
self.iterations = 0
|
||||
self.np1 = [self.tau_tilde.copy()]
|
||||
self.np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.num_data)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
dtd1 = Delta_tau*Diag[i] + 1.
|
||||
dii = Diag[i]
|
||||
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
||||
pi_ = P[i,:].reshape(1,num_inducing)
|
||||
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
||||
Rp_i = np.dot(R,pi_.T)
|
||||
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
||||
R = jitchol(RTR).T
|
||||
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
||||
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
||||
RPT = np.dot(R,P.T)
|
||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||
mu = self.w + np.dot(P,self.Gamma)
|
||||
self.iterations += 1
|
||||
#Sigma recomptutation with Cholesky decompositon
|
||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
||||
Diag = Diag0 * Iplus_Dprod_i
|
||||
P = Iplus_Dprod_i[:,None] * P0
|
||||
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
||||
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
||||
R,info = dtrtrs(L,R0,lower=1)
|
||||
RPT = np.dot(R,P.T)
|
||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||
self.w = Diag * self.v_tilde
|
||||
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
||||
mu = self.w + np.dot(P,self.Gamma)
|
||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
|
||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
|
||||
self.np1.append(self.tau_tilde.copy())
|
||||
self.np2.append(self.v_tilde.copy())
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
|
@ -1,385 +0,0 @@
|
|||
# Copyright (c) 2013, Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||
from likelihood import likelihood
|
||||
|
||||
class EP_Mixed_Noise(likelihood):
|
||||
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
|
||||
"""
|
||||
Expectation Propagation
|
||||
|
||||
Arguments
|
||||
---------
|
||||
:param data_list: list of outputs
|
||||
:param noise_model_list: a list of noise models
|
||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
|
||||
:type epsilon: float
|
||||
:param power_ep: list of power ep parameters
|
||||
"""
|
||||
assert len(data_list) == len(noise_model_list)
|
||||
self.noise_model_list = noise_model_list
|
||||
n_list = [data.size for data in data_list]
|
||||
self.n_models = len(data_list)
|
||||
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
|
||||
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
|
||||
self.epsilon = epsilon
|
||||
self.eta, self.delta = power_ep
|
||||
self.data = np.vstack(data_list)
|
||||
self.N, self.output_dim = self.data.shape
|
||||
self.is_heteroscedastic = True
|
||||
self.num_params = 0#FIXME
|
||||
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
|
||||
#TODO non-gaussian index
|
||||
|
||||
#Initial values - Likelihood approximation parameters:
|
||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||
self.tau_tilde = np.zeros(self.N)
|
||||
self.v_tilde = np.zeros(self.N)
|
||||
|
||||
#initial values for the GP variables
|
||||
self.Y = np.zeros((self.N,1))
|
||||
self.covariance_matrix = np.eye(self.N)
|
||||
self.precision = np.ones(self.N)[:,None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = 0.
|
||||
|
||||
def restart(self):
|
||||
self.tau_tilde = np.zeros(self.N)
|
||||
self.v_tilde = np.zeros(self.N)
|
||||
self.Y = np.zeros((self.N,1))
|
||||
self.covariance_matrix = np.eye(self.N)
|
||||
self.precision = np.ones(self.N)[:,None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = 0.
|
||||
|
||||
def predictive_values(self,mu,var,full_cov,noise_model):
|
||||
"""
|
||||
Predicts the output given the GP
|
||||
|
||||
:param mu: GP's mean
|
||||
:param var: GP's variance
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||
:type full_cov: False|True
|
||||
:param noise_model: noise model to use
|
||||
:type noise_model: integer
|
||||
"""
|
||||
if full_cov:
|
||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||
#_mu = []
|
||||
#_var = []
|
||||
#_q1 = []
|
||||
#_q2 = []
|
||||
#for m,v,o in zip(mu,var,output.flatten()):
|
||||
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
|
||||
# _mu.append(a)
|
||||
# _var.append(b)
|
||||
# _q1.append(c)
|
||||
# _q2.append(d)
|
||||
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
|
||||
return self.noise_model_list[noise_model].predictive_values(mu,var)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
||||
|
||||
def _get_param_names(self):
|
||||
names = []
|
||||
for noise_model in self.noise_model_list:
|
||||
names += noise_model._get_param_names()
|
||||
return names
|
||||
|
||||
def _set_params(self,p):
|
||||
cs_params = np.cumsum([0]+self.n_params)
|
||||
for i in range(len(self.n_params)):
|
||||
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
||||
|
||||
def _gradients(self,partial):
|
||||
#NOTE this is not tested
|
||||
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
|
||||
|
||||
def _compute_GP_variables(self):
|
||||
#Variables to be called from GP
|
||||
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
||||
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
||||
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
||||
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
||||
|
||||
self.Y = mu_tilde[:,None]
|
||||
self.YYT = np.dot(self.Y,self.Y.T)
|
||||
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
||||
self.precision = self.tau_tilde[:,None]
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.V
|
||||
self.trYYT = np.trace(self.YYT)
|
||||
|
||||
def fit_full(self,K):
|
||||
"""
|
||||
The expectation-propagation algorithm.
|
||||
For nomenclature see Rasmussen & Williams 2006.
|
||||
"""
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
mu = np.zeros(self.N)
|
||||
Sigma = K.copy()
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.N,dtype=float)
|
||||
self.v_ = np.empty(self.N,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.N,dtype=float)
|
||||
self.Z_hat = np.empty(self.N,dtype=float)
|
||||
phi = np.empty(self.N,dtype=float)
|
||||
mu_hat = np.empty(self.N,dtype=float)
|
||||
sigma2_hat = np.empty(self.N,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = self.epsilon + 1.
|
||||
epsilon_np2 = self.epsilon + 1.
|
||||
self.iterations = 0
|
||||
self.np1 = [self.tau_tilde.copy()]
|
||||
self.np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.N)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
||||
mu = np.dot(Sigma,self.v_tilde)
|
||||
self.iterations += 1
|
||||
#Sigma recomptutation with Cholesky decompositon
|
||||
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
||||
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
||||
L = jitchol(B)
|
||||
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
||||
Sigma = K - np.dot(V.T,V)
|
||||
mu = np.dot(Sigma,self.v_tilde)
|
||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||
self.np1.append(self.tau_tilde.copy())
|
||||
self.np2.append(self.v_tilde.copy())
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
||||
def fit_DTC(self, Kmm, Kmn):
|
||||
"""
|
||||
The expectation-propagation algorithm with sparse pseudo-input.
|
||||
For nomenclature see ... 2013.
|
||||
"""
|
||||
num_inducing = Kmm.shape[0]
|
||||
|
||||
#TODO: this doesn't work with uncertain inputs!
|
||||
|
||||
"""
|
||||
Prior approximation parameters:
|
||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
||||
"""
|
||||
KmnKnm = np.dot(Kmn,Kmn.T)
|
||||
Lm = jitchol(Kmm)
|
||||
Lmi = chol_inv(Lm)
|
||||
Kmmi = np.dot(Lmi.T,Lmi)
|
||||
KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||
LLT0 = Kmm.copy()
|
||||
|
||||
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
||||
#KmnKnm = np.dot(Kmn, Kmn.T)
|
||||
#KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||
#LLT0 = Kmm.copy()
|
||||
|
||||
"""
|
||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||
Sigma = Diag + P*R.T*R*P.T + K
|
||||
mu = w + P*Gamma
|
||||
"""
|
||||
mu = np.zeros(self.N)
|
||||
LLT = Kmm.copy()
|
||||
Sigma_diag = Qnn_diag.copy()
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.N,dtype=float)
|
||||
self.v_ = np.empty(self.N,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.N,dtype=float)
|
||||
self.Z_hat = np.empty(self.N,dtype=float)
|
||||
phi = np.empty(self.N,dtype=float)
|
||||
mu_hat = np.empty(self.N,dtype=float)
|
||||
sigma2_hat = np.empty(self.N,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = 1
|
||||
epsilon_np2 = 1
|
||||
self.iterations = 0
|
||||
np1 = [self.tau_tilde.copy()]
|
||||
np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.N)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
||||
L = jitchol(LLT)
|
||||
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
Sigma_diag = np.sum(V*V,-2)
|
||||
si = np.sum(V.T*V[:,i],-1)
|
||||
mu += (Delta_v-Delta_tau*mu[i])*si
|
||||
self.iterations += 1
|
||||
#Sigma recomputation with Cholesky decompositon
|
||||
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
||||
L = jitchol(LLT)
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
V2,info = dtrtrs(L.T,V,lower=0)
|
||||
Sigma_diag = np.sum(V*V,-2)
|
||||
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
||||
mu = np.dot(V2.T,Knmv_tilde)
|
||||
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
|
||||
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
|
||||
np1.append(self.tau_tilde.copy())
|
||||
np2.append(self.v_tilde.copy())
|
||||
|
||||
self._compute_GP_variables()
|
||||
|
||||
def fit_FITC(self, Kmm, Kmn, Knn_diag):
|
||||
"""
|
||||
The expectation-propagation algorithm with sparse pseudo-input.
|
||||
For nomenclature see Naish-Guzman and Holden, 2008.
|
||||
"""
|
||||
num_inducing = Kmm.shape[0]
|
||||
|
||||
"""
|
||||
Prior approximation parameters:
|
||||
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
||||
"""
|
||||
Lm = jitchol(Kmm)
|
||||
Lmi = chol_inv(Lm)
|
||||
Kmmi = np.dot(Lmi.T,Lmi)
|
||||
P0 = Kmn.T
|
||||
KmnKnm = np.dot(P0.T, P0)
|
||||
KmmiKmn = np.dot(Kmmi,P0.T)
|
||||
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
||||
Diag0 = Knn_diag - Qnn_diag
|
||||
R0 = jitchol(Kmmi).T
|
||||
|
||||
"""
|
||||
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||
Sigma = Diag + P*R.T*R*P.T + K
|
||||
mu = w + P*Gamma
|
||||
"""
|
||||
self.w = np.zeros(self.N)
|
||||
self.Gamma = np.zeros(num_inducing)
|
||||
mu = np.zeros(self.N)
|
||||
P = P0.copy()
|
||||
R = R0.copy()
|
||||
Diag = Diag0.copy()
|
||||
Sigma_diag = Knn_diag
|
||||
RPT0 = np.dot(R0,P0.T)
|
||||
|
||||
"""
|
||||
Initial values - Cavity distribution parameters:
|
||||
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||
sigma_ = 1./tau_
|
||||
mu_ = v_/tau_
|
||||
"""
|
||||
self.tau_ = np.empty(self.N,dtype=float)
|
||||
self.v_ = np.empty(self.N,dtype=float)
|
||||
|
||||
#Initial values - Marginal moments
|
||||
z = np.empty(self.N,dtype=float)
|
||||
self.Z_hat = np.empty(self.N,dtype=float)
|
||||
phi = np.empty(self.N,dtype=float)
|
||||
mu_hat = np.empty(self.N,dtype=float)
|
||||
sigma2_hat = np.empty(self.N,dtype=float)
|
||||
|
||||
#Approximation
|
||||
epsilon_np1 = 1
|
||||
epsilon_np2 = 1
|
||||
self.iterations = 0
|
||||
self.np1 = [self.tau_tilde.copy()]
|
||||
self.np2 = [self.v_tilde.copy()]
|
||||
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||
update_order = np.random.permutation(self.N)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||
#Marginal moments
|
||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||
#Site parameters update
|
||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
self.tau_tilde[i] += Delta_tau
|
||||
self.v_tilde[i] += Delta_v
|
||||
#Posterior distribution parameters update
|
||||
dtd1 = Delta_tau*Diag[i] + 1.
|
||||
dii = Diag[i]
|
||||
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
||||
pi_ = P[i,:].reshape(1,num_inducing)
|
||||
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
||||
Rp_i = np.dot(R,pi_.T)
|
||||
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
||||
R = jitchol(RTR).T
|
||||
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
||||
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
||||
RPT = np.dot(R,P.T)
|
||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||
mu = self.w + np.dot(P,self.Gamma)
|
||||
self.iterations += 1
|
||||
#Sigma recomptutation with Cholesky decompositon
|
||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
||||
Diag = Diag0 * Iplus_Dprod_i
|
||||
P = Iplus_Dprod_i[:,None] * P0
|
||||
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
||||
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
||||
R,info = dtrtrs(L,R0,lower=1)
|
||||
RPT = np.dot(R,P.T)
|
||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||
self.w = Diag * self.v_tilde
|
||||
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
||||
mu = self.w + np.dot(P,self.Gamma)
|
||||
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||
self.np1.append(self.tau_tilde.copy())
|
||||
self.np2.append(self.v_tilde.copy())
|
||||
|
||||
return self._compute_GP_variables()
|
||||
|
|
@ -1,15 +1,14 @@
|
|||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Copyright (c) 2012-2014 GPy Authors
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||
import gp_transformations
|
||||
from noise_distributions import NoiseDistribution
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
|
||||
class Exponential(NoiseDistribution):
|
||||
class Exponential(Likelihood):
|
||||
"""
|
||||
Expoential likelihood
|
||||
Y is expected to take values in {0,1,2,...}
|
||||
|
|
@ -18,13 +17,12 @@ class Exponential(NoiseDistribution):
|
|||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||
$$
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
def __init__(self,gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Log()
|
||||
super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
return Y
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
|
|
@ -35,16 +33,15 @@ class Exponential(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in exponential distribution
|
||||
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
log_objective = link_f*np.exp(-y*link_f)
|
||||
return np.exp(np.sum(np.log(log_objective)))
|
||||
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
|
||||
|
||||
def logpdf_link(self, link_f, y, extra_data=None):
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
|
|
@ -55,17 +52,16 @@ class Exponential(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in exponential distribution
|
||||
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
log_objective = np.log(link_f) - y*link_f
|
||||
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
|
||||
return np.sum(log_objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -76,7 +72,7 @@ class Exponential(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in exponential distribution
|
||||
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -86,7 +82,7 @@ class Exponential(NoiseDistribution):
|
|||
#grad = y/(link_f**2) - 1./link_f
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
|
@ -99,7 +95,7 @@ class Exponential(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in exponential distribution
|
||||
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -112,7 +108,7 @@ class Exponential(NoiseDistribution):
|
|||
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -123,7 +119,7 @@ class Exponential(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in exponential distribution
|
||||
:param Y_metadata: Y_metadata which is not used in exponential distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
|
|
@ -132,18 +128,6 @@ class Exponential(NoiseDistribution):
|
|||
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||
return d3lik_dlink3
|
||||
|
||||
def _mean(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def _variance(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
"""
|
||||
return self.gp_link.transf(gp)**2
|
||||
|
||||
def samples(self, gp):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
|
@ -1,15 +1,15 @@
|
|||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||
import gp_transformations
|
||||
from noise_distributions import NoiseDistribution
|
||||
from ..core.parameterization import Param
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
|
||||
class Gamma(NoiseDistribution):
|
||||
class Gamma(Likelihood):
|
||||
"""
|
||||
Gamma likelihood
|
||||
|
||||
|
|
@ -18,14 +18,16 @@ class Gamma(NoiseDistribution):
|
|||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
||||
self.beta = beta
|
||||
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
def __init__(self,gp_link=None,beta=1.):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Log()
|
||||
super(Gamma, self).__init__(gp_link, 'Gamma')
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
return Y
|
||||
self.beta = Param('beta', beta)
|
||||
self.link_parameter(self.beta)
|
||||
self.beta.fix()#TODO: gradients!
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
|
|
@ -37,7 +39,7 @@ class Gamma(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
|
|
@ -47,7 +49,7 @@ class Gamma(NoiseDistribution):
|
|||
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
|
||||
return np.exp(np.sum(np.log(objective)))
|
||||
|
||||
def logpdf_link(self, link_f, y, extra_data=None):
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
|
|
@ -59,7 +61,7 @@ class Gamma(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
|
|
@ -71,7 +73,7 @@ class Gamma(NoiseDistribution):
|
|||
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||
return np.sum(log_objective)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -83,7 +85,7 @@ class Gamma(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in gamma distribution
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -94,7 +96,7 @@ class Gamma(NoiseDistribution):
|
|||
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
|
@ -108,7 +110,7 @@ class Gamma(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in gamma distribution
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -122,7 +124,7 @@ class Gamma(NoiseDistribution):
|
|||
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -134,22 +136,10 @@ class Gamma(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in gamma distribution
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||
return d3lik_dlink3
|
||||
|
||||
def _mean(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def _variance(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
"""
|
||||
return self.gp_link.transf(gp)/self.beta
|
||||
|
|
@ -1,114 +1,318 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#TODO
|
||||
"""
|
||||
A lot of this code assumes that the link function is the identity.
|
||||
|
||||
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
|
||||
|
||||
Furthermore, exact Guassian inference can only be done for the identity link, so we should be asserting so for all calls which relate to that.
|
||||
|
||||
James 11/12/13
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from likelihood import likelihood
|
||||
from ..util.linalg import jitchol
|
||||
from scipy import stats, special
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from scipy import stats
|
||||
|
||||
|
||||
class Gaussian(likelihood):
|
||||
class Gaussian(Likelihood):
|
||||
"""
|
||||
Likelihood class for doing Expectation propagation
|
||||
Gaussian likelihood
|
||||
|
||||
:param data: observed output
|
||||
:type data: Nx1 numpy.darray
|
||||
:param variance: noise parameter
|
||||
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize: False|True
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param variance: variance value of the Gaussian distribution
|
||||
:param N: Number of data points
|
||||
:type N: int
|
||||
"""
|
||||
def __init__(self, data, variance=1., normalize=False):
|
||||
self.is_heteroscedastic = False
|
||||
self.num_params = 1
|
||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||
N, self.output_dim = data.shape
|
||||
def __init__(self, gp_link=None, variance=1., name='Gaussian_noise'):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
# normalization
|
||||
if normalize:
|
||||
self._offset = data.mean(0)[None, :]
|
||||
self._scale = data.std(0)[None, :]
|
||||
# Don't scale outputs which have zero variance to zero.
|
||||
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
|
||||
else:
|
||||
self._offset = np.zeros((1, self.output_dim))
|
||||
self._scale = np.ones((1, self.output_dim))
|
||||
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
|
||||
|
||||
self.set_data(data)
|
||||
super(Gaussian, self).__init__(gp_link, name=name)
|
||||
|
||||
self._variance = np.asarray(variance) + 1.
|
||||
self._set_params(np.asarray(variance))
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.link_parameter(self.variance)
|
||||
|
||||
super(Gaussian, self).__init__()
|
||||
if isinstance(gp_link, link_functions.Identity):
|
||||
self.log_concave = True
|
||||
|
||||
def set_data(self, data):
|
||||
self.data = data
|
||||
self.N, D = data.shape
|
||||
assert D == self.output_dim
|
||||
self.Y = (self.data - self._offset) / self._scale
|
||||
if D > self.N:
|
||||
self.YYT = np.dot(self.Y, self.Y.T)
|
||||
self.trYYT = np.trace(self.YYT)
|
||||
self.YYT_factor = jitchol(self.YYT)
|
||||
else:
|
||||
self.YYT = None
|
||||
self.trYYT = np.sum(np.square(self.Y))
|
||||
self.YYT_factor = self.Y
|
||||
def betaY(self,Y,Y_metadata=None):
|
||||
#TODO: ~Ricardo this does not live here
|
||||
return Y/self.gaussian_variance(Y_metadata)
|
||||
|
||||
def _get_params(self):
|
||||
return np.asarray(self._variance)
|
||||
def gaussian_variance(self, Y_metadata=None):
|
||||
return self.variance
|
||||
|
||||
def _get_param_names(self):
|
||||
return ["noise_variance"]
|
||||
def update_gradients(self, grad):
|
||||
self.variance.gradient = grad
|
||||
|
||||
def _set_params(self, x):
|
||||
x = np.float64(x)
|
||||
if np.all(self._variance != x):
|
||||
if x == 0.:#special case of zero noise
|
||||
self.precision = np.inf
|
||||
self.V = None
|
||||
else:
|
||||
self.precision = 1. / x
|
||||
self.V = (self.precision) * self.Y
|
||||
self.VVT_factor = self.precision * self.YYT_factor
|
||||
self.covariance_matrix = np.eye(self.N) * x
|
||||
self._variance = x
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
return dL_dKdiag.sum()
|
||||
|
||||
def predictive_values(self, mu, var, full_cov, **likelihood_args):
|
||||
def _preprocess_values(self, Y):
|
||||
"""
|
||||
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
|
||||
Check if the values of the observations correspond to the values
|
||||
assumed by the likelihood function.
|
||||
"""
|
||||
mean = mu * self._scale + self._offset
|
||||
return Y
|
||||
|
||||
def _moments_match_ep(self, data_i, tau_i, v_i):
|
||||
"""
|
||||
Moments match of the marginal approximation in EP algorithm
|
||||
|
||||
:param i: number of observation (int)
|
||||
:param tau_i: precision of the cavity distribution (float)
|
||||
:param v_i: mean/variance of the cavity distribution (float)
|
||||
"""
|
||||
sigma2_hat = 1./(1./self.variance + tau_i)
|
||||
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
|
||||
sum_var = self.variance + 1./tau_i
|
||||
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
if full_cov:
|
||||
if self.output_dim > 1:
|
||||
raise NotImplementedError, "TODO"
|
||||
# Note. for output_dim>1, we need to re-normalise all the outputs independently.
|
||||
# This will mess up computations of diag(true_var), below.
|
||||
# note that the upper, lower quantiles should be the same shape as mean
|
||||
# Augment the output variance with the likelihood variance and rescale.
|
||||
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
|
||||
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
|
||||
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
|
||||
if var.ndim == 2:
|
||||
var += np.eye(var.shape[0])*self.variance
|
||||
if var.ndim == 3:
|
||||
var += np.atleast_3d(np.eye(var.shape[0])*self.variance)
|
||||
else:
|
||||
true_var = (var + self._variance) * self._scale ** 2
|
||||
_5pc = mean - 2.*np.sqrt(true_var)
|
||||
_95pc = mean + 2.*np.sqrt(true_var)
|
||||
return mean, true_var, _5pc, _95pc
|
||||
var += self.variance
|
||||
return mu, var
|
||||
|
||||
def predictive_mean(self, mu, sigma):
|
||||
return mu
|
||||
|
||||
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
||||
return self.variance + sigma**2
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles]
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
#Assumes no covariance, exp, sum, log for numerical stability
|
||||
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
ln_det_cov = N*np.log(self.variance)
|
||||
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s2_i = (1.0/self.variance)
|
||||
grad = s2_i*y - s2_i*link_f
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link_f, w.r.t link_f.
|
||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
hess = -(1.0/self.variance)*np.ones((N, 1))
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
d3logpdf_dlink3 = np.zeros((N,1))
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||
return dlik_grad_dsigma
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return np.asarray([[dlogpdf_dvar]])
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def _mean(self, gp):
|
||||
"""
|
||||
Expected value of y under the Mass (or density) function p(y|f)
|
||||
|
||||
.. math::
|
||||
E_{p(y|f)}[y]
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def _variance(self, gp):
|
||||
"""
|
||||
Variance of y under the Mass (or density) function p(y|f)
|
||||
|
||||
.. math::
|
||||
Var_{p(y|f)}[y]
|
||||
"""
|
||||
return self.variance
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
#orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type mu_star: (Nx1) array
|
||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
|
||||
.. Note:
|
||||
Works as if each test point was provided individually, i.e. not full_cov
|
||||
assumes independence
|
||||
"""
|
||||
y_rescaled = (y_test - self._offset)/self._scale
|
||||
return -0.5*np.log(2*np.pi) -0.5*np.log(var_star + self._variance) -0.5*(np.square(y_rescaled - mu_star))/(var_star + self._variance)
|
||||
v = var_star + self.variance
|
||||
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
|
||||
|
||||
def _gradients(self, partial):
|
||||
return np.sum(partial)
|
||||
|
|
|
|||
|
|
@ -1,108 +0,0 @@
|
|||
# Copyright (c) 2013, Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||
from likelihood import likelihood
|
||||
from . import Gaussian
|
||||
|
||||
|
||||
class Gaussian_Mixed_Noise(likelihood):
|
||||
"""
|
||||
Gaussian Likelihood for multiple outputs
|
||||
|
||||
This is a wrapper around likelihood.Gaussian class
|
||||
|
||||
:param data_list: data observations
|
||||
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
|
||||
:param noise_params: noise parameters of each output
|
||||
:type noise_params: list of floats, one per output
|
||||
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize: False|True
|
||||
"""
|
||||
def __init__(self, data_list, noise_params=None, normalize=True):
|
||||
self.num_params = len(data_list)
|
||||
self.n_list = [data.size for data in data_list]
|
||||
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
|
||||
|
||||
if noise_params is None:
|
||||
noise_params = [1.] * self.num_params
|
||||
else:
|
||||
assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
|
||||
|
||||
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
|
||||
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
|
||||
self.data = np.vstack(data_list)
|
||||
self.N, self.output_dim = self.data.shape
|
||||
self._offset = np.zeros((1, self.output_dim))
|
||||
self._scale = np.ones((1, self.output_dim))
|
||||
|
||||
self.is_heteroscedastic = True
|
||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||
|
||||
self.set_data(data_list)
|
||||
self._set_params(np.asarray(noise_params))
|
||||
|
||||
super(Gaussian_Mixed_Noise, self).__init__()
|
||||
|
||||
def set_data(self, data_list):
|
||||
self.data = np.vstack(data_list)
|
||||
self.N, D = self.data.shape
|
||||
assert D == self.output_dim
|
||||
self.Y = (self.data - self._offset) / self._scale
|
||||
if D > self.N:
|
||||
raise NotImplementedError
|
||||
#self.YYT = np.dot(self.Y, self.Y.T)
|
||||
#self.trYYT = np.trace(self.YYT)
|
||||
#self.YYT_factor = jitchol(self.YYT)
|
||||
else:
|
||||
self.YYT = None
|
||||
self.trYYT = np.sum(np.square(self.Y))
|
||||
self.YYT_factor = self.Y
|
||||
|
||||
def predictive_values(self,mu,var,full_cov,noise_model):
|
||||
"""
|
||||
Predicts the output given the GP
|
||||
|
||||
:param mu: GP's mean
|
||||
:param var: GP's variance
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||
:type full_cov: False|True
|
||||
:param noise_model: noise model to use
|
||||
:type noise_model: integer
|
||||
"""
|
||||
if full_cov:
|
||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
||||
|
||||
def _get_param_names(self):
|
||||
if len(self.noise_model_list) == 1:
|
||||
names = self.noise_model_list[0]._get_param_names()
|
||||
else:
|
||||
names = []
|
||||
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
|
||||
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
|
||||
return names
|
||||
|
||||
def _set_params(self,p):
|
||||
cs_params = np.cumsum([0]+self.n_params)
|
||||
|
||||
for i in range(len(self.n_params)):
|
||||
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
||||
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
|
||||
|
||||
self.V = self.precision * self.Y
|
||||
self.VVT_factor = self.precision * self.YYT_factor
|
||||
self.covariance_matrix = np.eye(self.N) * 1./self.precision
|
||||
|
||||
def _gradients(self,partial):
|
||||
gradients = []
|
||||
aux = np.cumsum([0]+self.n_list)
|
||||
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
|
||||
gradients += [noise_model._gradients(partial[ai:af])]
|
||||
return np.hstack(gradients)
|
||||
|
|
@ -1,60 +1,77 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
from ..core.parameterized import Parameterized
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from ..util.misc import chain_1, chain_2, chain_3
|
||||
from scipy.integrate import quad
|
||||
import warnings
|
||||
from ..core.parameterization import Parameterized
|
||||
|
||||
class likelihood(Parameterized):
|
||||
class Likelihood(Parameterized):
|
||||
"""
|
||||
The atom for a likelihood class
|
||||
Likelihood base class, used to defing p(y|f).
|
||||
|
||||
This object interfaces the GP and the data. The most basic likelihood
|
||||
(Gaussian) inherits directly from this, as does the EP algorithm
|
||||
All instances use _inverse_ link functions, which can be swapped out. It is
|
||||
expected that inheriting classes define a default inverse link function
|
||||
|
||||
Some things must be defined for this to work properly:
|
||||
To use this class, inherit and define missing functionality.
|
||||
|
||||
- self.Y : the effective Gaussian target of the GP
|
||||
- self.N, self.D : Y.shape
|
||||
- self.covariance_matrix : the effective (noise) covariance of the GP targets
|
||||
- self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
|
||||
- self.is_heteroscedastic : enables significant computational savings in GP
|
||||
- self.precision : a scalar or vector representation of the effective target precision
|
||||
- self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
|
||||
- self.V : self.precision * self.Y
|
||||
Inheriting classes *must* implement:
|
||||
pdf_link : a bound method which turns the output of the link function into the pdf
|
||||
logpdf_link : the logarithm of the above
|
||||
|
||||
To enable use with EP, inheriting classes *must* define:
|
||||
TODO: a suitable derivative function for any parameters of the class
|
||||
It is also desirable to define:
|
||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||
|
||||
To enable use with Laplace approximation, inheriting classes *must* define:
|
||||
Some derivative functions *AS TODO*
|
||||
|
||||
For exact Gaussian inference, define *JH TODO*
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
Parameterized.__init__(self)
|
||||
self.dZ_dK = 0
|
||||
def __init__(self, gp_link, name):
|
||||
super(Likelihood, self).__init__(name)
|
||||
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||
self.gp_link = gp_link
|
||||
self.log_concave = False
|
||||
|
||||
def _get_params(self):
|
||||
def _gradients(self,partial):
|
||||
return np.zeros(0)
|
||||
|
||||
def update_gradients(self, partial):
|
||||
if self.size > 0:
|
||||
raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized')
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
"""
|
||||
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||
|
||||
:param Y: observed output
|
||||
:type Y: Nx1 numpy.darray
|
||||
|
||||
"""
|
||||
return Y
|
||||
|
||||
def conditional_mean(self, gp):
|
||||
"""
|
||||
The mean of the random variable conditioned on one value of the GP
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _get_param_names(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _set_params(self, x):
|
||||
raise NotImplementedError
|
||||
|
||||
def fit_full(self, K):
|
||||
def conditional_variance(self, gp):
|
||||
"""
|
||||
No approximations needed by default
|
||||
The variance of the random variable conditioned on one value of the GP
|
||||
"""
|
||||
pass
|
||||
|
||||
def restart(self):
|
||||
"""
|
||||
No need to restart if not an approximation
|
||||
"""
|
||||
pass
|
||||
|
||||
def _gradients(self, partial):
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_values(self, mu, var):
|
||||
raise NotImplementedError
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
"""
|
||||
Calculation of the predictive density
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
|
@ -66,4 +83,391 @@ class likelihood(Parameterized):
|
|||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
"""
|
||||
assert y_test.shape==mu_star.shape
|
||||
assert y_test.shape==var_star.shape
|
||||
assert y_test.shape[1] == 1
|
||||
def integral_generator(y, m, v):
|
||||
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(f_star):
|
||||
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||
return f
|
||||
|
||||
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
|
||||
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def _moments_match_ep(self,obs,tau,v):
|
||||
"""
|
||||
Calculation of moments using quadrature
|
||||
|
||||
:param obs: observed output
|
||||
:param tau: cavity distribution 1st natural parameter (precision)
|
||||
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||
"""
|
||||
#Compute first integral for zeroth moment.
|
||||
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
|
||||
mu = v/tau
|
||||
def int_1(f):
|
||||
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
|
||||
|
||||
#Compute second integral for first moment
|
||||
def int_2(f):
|
||||
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
mean, accuracy = quad(int_2, -np.inf, np.inf)
|
||||
mean /= z_scaled
|
||||
|
||||
#Compute integral for variance
|
||||
def int_3(f):
|
||||
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
|
||||
Ef2 /= z_scaled
|
||||
variance = Ef2 - mean**2
|
||||
|
||||
#Add constant to the zeroth moment
|
||||
#NOTE: this constant is not needed in the other moments because it cancells out.
|
||||
z = z_scaled/np.sqrt(2*np.pi/tau)
|
||||
|
||||
return z, mean, variance
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
"""
|
||||
Use Gauss-Hermite Quadrature to compute
|
||||
|
||||
E_p(f) [ log p(y|f) ]
|
||||
d/dm E_p(f) [ log p(y|f) ]
|
||||
d/dv E_p(f) [ log p(y|f) ]
|
||||
|
||||
where p(f) is a Gaussian with mean m and variance v. The shapes of Y, m and v should match.
|
||||
|
||||
if no gh_points are passed, we construct them using defualt options
|
||||
"""
|
||||
|
||||
if gh_points is None:
|
||||
gh_x, gh_w = np.polynomial.hermite.hermgauss(12)
|
||||
else:
|
||||
gh_x, gh_w = gh_points
|
||||
|
||||
shape = m.shape
|
||||
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
|
||||
|
||||
#make a grid of points
|
||||
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
|
||||
|
||||
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
||||
# broadcast needs to be handled carefully.
|
||||
logp = self.logpdf(X,Y[:,None])
|
||||
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
|
||||
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
|
||||
|
||||
#clipping for numerical stability
|
||||
logp = np.clip(logp,-1e6,1e6)
|
||||
dlogp_dx = np.clip(dlogp_dx,-1e6,1e6)
|
||||
d2logp_dx2 = np.clip(d2logp_dx2,-1e6,1e6)
|
||||
|
||||
#average over the gird to get derivatives of the Gaussian's parameters
|
||||
F = np.dot(logp, gh_w)
|
||||
dF_dm = np.dot(dlogp_dx, gh_w)
|
||||
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
|
||||
|
||||
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
|
||||
stop
|
||||
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
|
||||
stop
|
||||
|
||||
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape)
|
||||
|
||||
|
||||
|
||||
|
||||
def predictive_mean(self, mu, variance, Y_metadata=None):
|
||||
"""
|
||||
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
|
||||
"""
|
||||
#conditional_mean: the edpected value of y given some f, under this likelihood
|
||||
def int_mean(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
#If p is zero then conditional_mean will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)*p
|
||||
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
|
||||
|
||||
return mean
|
||||
|
||||
def _conditional_mean(self, f):
|
||||
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
|
||||
raise NotImplementedError, "implement this function to make predictions"
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
"""
|
||||
Approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||
|
||||
"""
|
||||
#sigma2 = sigma**2
|
||||
normalizer = np.sqrt(2*np.pi*variance)
|
||||
|
||||
# E( V(Y_star|f_star) )
|
||||
def int_var(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
#If p is zero then conditional_variance will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_variance(f)*p
|
||||
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
|
||||
|
||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||
|
||||
#E( E(Y_star|f_star) )**2
|
||||
if predictive_mean is None:
|
||||
predictive_mean = self.predictive_mean(mu,variance)
|
||||
predictive_mean_sq = predictive_mean**2
|
||||
|
||||
#E( E(Y_star|f_star)**2 )
|
||||
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
#If p is zero then conditional_mean**2 will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)**2*p
|
||||
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
|
||||
|
||||
var_exp = exp_exp2 - predictive_mean_sq
|
||||
|
||||
# V(Y_star) = E[ V(Y_star|f_star) ] + V[ E(Y_star|f_star) ]
|
||||
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
|
||||
return exp_var + var_exp
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def pdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
||||
|
||||
.. math:
|
||||
p(y|\\lambda(f))
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def logpdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||
|
||||
.. math:
|
||||
\\log p(y|\\lambda(f))
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def dlogpdf_df(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
def d2logpdf_df2(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
|
||||
def d3logpdf_df3(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
|
||||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
|
||||
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([1, 0])
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
"""
|
||||
Compute mean, variance of the predictive distibution.
|
||||
|
||||
:param mu: mean of the latent variable, f, of posterior
|
||||
:param var: variance of the latent variable, f, of posterior
|
||||
:param full_cov: whether to use the full covariance or just the diagonal
|
||||
:type full_cov: Boolean
|
||||
"""
|
||||
|
||||
pred_mean = self.predictive_mean(mu, var, Y_metadata)
|
||||
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
|
||||
|
||||
return pred_mean, pred_var
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
#compute the quantiles by sampling!!!
|
||||
N_samp = 1000
|
||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||
#ss_f = s.flatten()
|
||||
#ss_y = self.samples(ss_f, Y_metadata)
|
||||
ss_y = self.samples(s, Y_metadata)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||
|
||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
import scipy as sp
|
||||
import pylab as pb
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
||||
|
||||
_exp_lim_val = np.finfo(np.float64).max
|
||||
_lim_val = np.log(_exp_lim_val)
|
||||
|
||||
class GPTransformation(object):
|
||||
"""
|
||||
Link function class for doing non-Gaussian likelihoods approximation
|
||||
|
|
@ -69,7 +70,7 @@ class Probit(GPTransformation):
|
|||
.. math::
|
||||
|
||||
g(f) = \\Phi^{-1} (mu)
|
||||
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return std_norm_cdf(f)
|
||||
|
|
@ -86,6 +87,34 @@ class Probit(GPTransformation):
|
|||
f2 = f**2
|
||||
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||
|
||||
|
||||
class Cloglog(GPTransformation):
|
||||
"""
|
||||
Complementary log-log link
|
||||
.. math::
|
||||
|
||||
p(f) = 1 - e^{-e^f}
|
||||
|
||||
or
|
||||
|
||||
f = \log (-\log(1-p))
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return 1-np.exp(-np.exp(f))
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f-np.exp(f))
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
ef = np.exp(f)
|
||||
return -np.exp(f-ef)*(ef-1.)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
ef = np.exp(f)
|
||||
return np.exp(f-ef)*(1.-3*ef + ef**2)
|
||||
|
||||
|
||||
class Log(GPTransformation):
|
||||
"""
|
||||
.. math::
|
||||
|
|
@ -94,16 +123,16 @@ class Log(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.exp(f)
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f)
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
return np.exp(f)
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return np.exp(f)
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
|
||||
class Log_ex_1(GPTransformation):
|
||||
"""
|
||||
82
GPy/likelihoods/mixed_noise.py
Normal file
82
GPy/likelihoods/mixed_noise.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from gaussian import Gaussian
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from ..core.parameterization import Parameterized
|
||||
import itertools
|
||||
|
||||
class MixedNoise(Likelihood):
|
||||
def __init__(self, likelihoods_list, name='mixed_noise'):
|
||||
#NOTE at the moment this likelihood only works for using a list of gaussians
|
||||
super(Likelihood, self).__init__(name=name)
|
||||
|
||||
self.link_parameters(*likelihoods_list)
|
||||
self.likelihoods_list = likelihoods_list
|
||||
self.log_concave = False
|
||||
|
||||
def gaussian_variance(self, Y_metadata):
|
||||
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||
ind = Y_metadata['output_index'].flatten()
|
||||
variance = np.zeros(ind.size)
|
||||
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
|
||||
variance[ind==j] = lik.variance
|
||||
return variance
|
||||
|
||||
def betaY(self,Y,Y_metadata):
|
||||
#TODO not here.
|
||||
return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]
|
||||
|
||||
def update_gradients(self, gradients):
|
||||
self.gradient = gradients
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
|
||||
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||
ind = Y_metadata['output_index'].flatten()
|
||||
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
ind = Y_metadata['output_index'].flatten()
|
||||
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
|
||||
if full_cov:
|
||||
var += np.eye(var.shape[0])*_variance
|
||||
else:
|
||||
var += _variance
|
||||
return mu, var
|
||||
|
||||
def predictive_variance(self, mu, sigma, Y_metadata):
|
||||
_variance = self.gaussian_variance(Y_metadata)
|
||||
return _variance + sigma**2
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
|
||||
ind = Y_metadata['output_index'].flatten()
|
||||
outputs = np.unique(ind)
|
||||
Q = np.zeros( (mu.size,len(quantiles)) )
|
||||
for j in outputs:
|
||||
q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:],
|
||||
var[ind==j,:],quantiles,Y_metadata=None)
|
||||
Q[ind==j,:] = np.hstack(q)
|
||||
return [q[:,None] for q in Q.T]
|
||||
|
||||
def samples(self, gp, Y_metadata):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
N1, N2 = gp.shape
|
||||
Ysim = np.zeros((N1,N2))
|
||||
ind = Y_metadata['output_index'].flatten()
|
||||
for j in np.unique(ind):
|
||||
flt = ind==j
|
||||
gp_filtered = gp[flt,:]
|
||||
n1 = gp_filtered.shape[0]
|
||||
lik = self.likelihoods_list[j]
|
||||
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
|
||||
Ysim[flt,:] = _ysim.reshape(n1,N2)
|
||||
return Ysim
|
||||
|
|
@ -1,121 +0,0 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import noise_models
|
||||
|
||||
def bernoulli(gp_link=None):
|
||||
"""
|
||||
Construct a bernoulli likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Probit()
|
||||
#else:
|
||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||
|
||||
if isinstance(gp_link,noise_models.gp_transformations.Probit):
|
||||
analytical_mean = True
|
||||
analytical_variance = False
|
||||
|
||||
elif isinstance(gp_link,noise_models.gp_transformations.Heaviside):
|
||||
analytical_mean = True
|
||||
analytical_variance = True
|
||||
|
||||
else:
|
||||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
|
||||
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
|
||||
|
||||
def exponential(gp_link=None):
|
||||
|
||||
"""
|
||||
Construct a exponential likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||
|
||||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
|
||||
|
||||
def gaussian_ep(gp_link=None,variance=1.):
|
||||
"""
|
||||
Construct a gaussian likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
:param variance: scalar
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Identity()
|
||||
#else:
|
||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||
|
||||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
|
||||
|
||||
def poisson(gp_link=None):
|
||||
"""
|
||||
Construct a Poisson likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||
#else:
|
||||
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
|
||||
|
||||
def gamma(gp_link=None,beta=1.):
|
||||
"""
|
||||
Construct a Gamma likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
:param beta: scalar
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||
analytical_mean = False
|
||||
analytical_variance = False
|
||||
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
|
||||
|
||||
def gaussian(gp_link=None, variance=2, D=None, N=None):
|
||||
"""
|
||||
Construct a Gaussian likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
:param variance: variance
|
||||
:type variance: scalar
|
||||
:returns: Gaussian noise model:
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Identity()
|
||||
analytical_mean = True
|
||||
analytical_variance = True # ?
|
||||
return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean,
|
||||
analytical_variance, variance=variance, D=D, N=N)
|
||||
|
||||
def student_t(gp_link=None, deg_free=5, sigma2=2):
|
||||
"""
|
||||
Construct a Student t likelihood
|
||||
|
||||
:param gp_link: a GPy gp_link function
|
||||
:param deg_free: degrees of freedom of student-t
|
||||
:type deg_free: scalar
|
||||
:param sigma2: variance
|
||||
:type sigma2: scalar
|
||||
:returns: Student-T noise model
|
||||
"""
|
||||
if gp_link is None:
|
||||
gp_link = noise_models.gp_transformations.Identity()
|
||||
analytical_mean = True
|
||||
analytical_variance = True
|
||||
return noise_models.student_t_noise.StudentT(gp_link, analytical_mean,
|
||||
analytical_variance,deg_free, sigma2)
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
import noise_distributions
|
||||
import bernoulli_noise
|
||||
import exponential_noise
|
||||
import gaussian_noise
|
||||
import gamma_noise
|
||||
import poisson_noise
|
||||
import student_t_noise
|
||||
import gp_transformations
|
||||
|
|
@ -1,300 +0,0 @@
|
|||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||
import gp_transformations
|
||||
from noise_distributions import NoiseDistribution
|
||||
|
||||
class Gaussian(NoiseDistribution):
|
||||
"""
|
||||
Gaussian likelihood
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param variance: variance value of the Gaussian distribution
|
||||
:param N: Number of data points
|
||||
:type N: int
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
|
||||
self.variance = variance
|
||||
self.N = N
|
||||
self._set_params(np.asarray(variance))
|
||||
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
if isinstance(gp_link , gp_transformations.Identity):
|
||||
self.log_concave = True
|
||||
|
||||
def _get_params(self):
|
||||
return np.array([self.variance])
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['noise_model_variance']
|
||||
|
||||
def _set_params(self, p):
|
||||
self.variance = float(p)
|
||||
self.I = np.eye(self.N)
|
||||
self.covariance_matrix = self.I * self.variance
|
||||
self.Ki = self.I*(1.0 / self.variance)
|
||||
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
|
||||
self.ln_det_K = self.N*np.log(self.variance)
|
||||
|
||||
def _gradients(self,partial):
|
||||
return np.zeros(1)
|
||||
#return np.sum(partial)
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
"""
|
||||
Check if the values of the observations correspond to the values
|
||||
assumed by the likelihood function.
|
||||
"""
|
||||
return Y
|
||||
|
||||
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||
"""
|
||||
Moments match of the marginal approximation in EP algorithm
|
||||
|
||||
:param i: number of observation (int)
|
||||
:param tau_i: precision of the cavity distribution (float)
|
||||
:param v_i: mean/variance of the cavity distribution (float)
|
||||
"""
|
||||
sigma2_hat = 1./(1./self.variance + tau_i)
|
||||
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
|
||||
sum_var = self.variance + 1./tau_i
|
||||
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def _predictive_mean_analytical(self,mu,sigma):
|
||||
new_sigma2 = self.predictive_variance(mu,sigma)
|
||||
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
||||
|
||||
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
|
||||
return 1./(1./self.variance + 1./sigma**2)
|
||||
|
||||
def _mass(self, link_f, y, extra_data=None):
|
||||
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
|
||||
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||
its derivatives")
|
||||
def _nlog_mass(self, link_f, y, extra_data=None):
|
||||
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
|
||||
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||
its derivatives")
|
||||
|
||||
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
|
||||
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
|
||||
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||
its derivatives")
|
||||
|
||||
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
|
||||
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
|
||||
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
|
||||
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
|
||||
its derivatives")
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
#Assumes no covariance, exp, sum, log for numerical stability
|
||||
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
|
||||
|
||||
def logpdf_link(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Log likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s2_i = (1.0/self.variance)
|
||||
grad = s2_i*y - s2_i*link_f
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Hessian at y, given link_f, w.r.t link_f.
|
||||
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
hess = -(1.0/self.variance)*np.ones((self.N, 1))
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||
return dlik_grad_dsigma
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data not used in gaussian
|
||||
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||
return np.asarray([[dlogpdf_dvar]])
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def _mean(self,gp):
|
||||
"""
|
||||
Expected value of y under the Mass (or density) function p(y|f)
|
||||
|
||||
.. math::
|
||||
E_{p(y|f)}[y]
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def _variance(self,gp):
|
||||
"""
|
||||
Variance of y under the Mass (or density) function p(y|f)
|
||||
|
||||
.. math::
|
||||
Var_{p(y|f)}[y]
|
||||
"""
|
||||
return self.variance
|
||||
|
||||
def samples(self, gp):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
|
@ -1,434 +0,0 @@
|
|||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import pylab as pb
|
||||
from GPy.util.plot import gpplot
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||
import gp_transformations
|
||||
from GPy.util.misc import chain_1, chain_2, chain_3
|
||||
from scipy.integrate import quad
|
||||
import warnings
|
||||
|
||||
class NoiseDistribution(object):
|
||||
"""
|
||||
Likelihood class for doing approximations
|
||||
"""
|
||||
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
||||
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||
self.gp_link = gp_link
|
||||
self.analytical_mean = analytical_mean
|
||||
self.analytical_variance = analytical_variance
|
||||
if self.analytical_mean:
|
||||
self.moments_match = self._moments_match_analytical
|
||||
self.predictive_mean = self._predictive_mean_analytical
|
||||
else:
|
||||
self.moments_match = self._moments_match_numerical
|
||||
self.predictive_mean = self._predictive_mean_numerical
|
||||
if self.analytical_variance:
|
||||
self.predictive_variance = self._predictive_variance_analytical
|
||||
else:
|
||||
self.predictive_variance = self._predictive_variance_numerical
|
||||
|
||||
self.log_concave = False
|
||||
|
||||
def _get_params(self):
|
||||
return np.zeros(0)
|
||||
|
||||
def _get_param_names(self):
|
||||
return []
|
||||
|
||||
def _set_params(self,p):
|
||||
pass
|
||||
|
||||
def _gradients(self,partial):
|
||||
return np.zeros(0)
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
"""
|
||||
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||
|
||||
:param Y: observed output
|
||||
:type Y: Nx1 numpy.darray
|
||||
|
||||
"""
|
||||
return Y
|
||||
|
||||
def _moments_match_analytical(self,obs,tau,v):
|
||||
"""
|
||||
If available, this function computes the moments analytically.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type mu_star: (Nx1) array
|
||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
"""
|
||||
assert y_test.shape==mu_star.shape
|
||||
assert y_test.shape==var_star.shape
|
||||
assert y_test.shape[1] == 1
|
||||
def integral_generator(y, m, v):
|
||||
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(f_star):
|
||||
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||
return f
|
||||
|
||||
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
|
||||
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def _moments_match_numerical(self,obs,tau,v):
|
||||
"""
|
||||
Calculation of moments using quadrature
|
||||
|
||||
:param obs: observed output
|
||||
:param tau: cavity distribution 1st natural parameter (precision)
|
||||
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||
"""
|
||||
#Compute first integral for zeroth moment.
|
||||
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
|
||||
mu = v/tau
|
||||
def int_1(f):
|
||||
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
|
||||
|
||||
#Compute second integral for first moment
|
||||
def int_2(f):
|
||||
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
mean, accuracy = quad(int_2, -np.inf, np.inf)
|
||||
mean /= z_scaled
|
||||
|
||||
#Compute integral for variance
|
||||
def int_3(f):
|
||||
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
|
||||
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
|
||||
Ef2 /= z_scaled
|
||||
variance = Ef2 - mean**2
|
||||
|
||||
#Add constant to the zeroth moment
|
||||
#NOTE: this constant is not needed in the other moments because it cancells out.
|
||||
z = z_scaled/np.sqrt(2*np.pi/tau)
|
||||
|
||||
return z, mean, variance
|
||||
|
||||
def _predictive_mean_analytical(self,mu,sigma):
|
||||
"""
|
||||
Predictive mean
|
||||
.. math::
|
||||
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
|
||||
|
||||
If available, this function computes the predictive mean analytically.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _predictive_variance_analytical(self,mu,sigma):
|
||||
"""
|
||||
Predictive variance
|
||||
.. math::
|
||||
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
|
||||
|
||||
If available, this function computes the predictive variance analytically.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def _predictive_mean_numerical(self,mu,variance):
|
||||
"""
|
||||
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
|
||||
"""
|
||||
#import ipdb; ipdb.set_trace()
|
||||
def int_mean(f,m,v):
|
||||
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m))
|
||||
#scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
scaled_mean = [quad(int_mean, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
|
||||
|
||||
return mean
|
||||
|
||||
def _predictive_variance_numerical(self,mu,variance,predictive_mean=None):
|
||||
"""
|
||||
Numerical approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||
|
||||
"""
|
||||
normalizer = np.sqrt(2*np.pi*variance)
|
||||
|
||||
# E( V(Y_star|f_star) )
|
||||
def int_var(f,m,v):
|
||||
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m))
|
||||
#Most of the weight is within 6 stds and this avoids some negative infinity and infinity problems of taking f^2
|
||||
scaled_exp_variance = [quad(int_var, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
|
||||
|
||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||
|
||||
#E( E(Y_star|f_star) )**2
|
||||
if predictive_mean is None:
|
||||
predictive_mean = self.predictive_mean(mu,variance)
|
||||
predictive_mean_sq = predictive_mean**2
|
||||
|
||||
#E( E(Y_star|f_star)**2 )
|
||||
def int_pred_mean_sq(f,m,v):
|
||||
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
|
||||
|
||||
var_exp = exp_exp2 - predictive_mean_sq
|
||||
|
||||
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
return exp_var + var_exp
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def logpdf_link(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def pdf(self, f, y, extra_data=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the likelihood (pdf) using it
|
||||
|
||||
.. math:
|
||||
p(y|\\lambda(f))
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(link_f, y, extra_data=extra_data)
|
||||
|
||||
def logpdf(self, f, y, extra_data=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||
|
||||
.. math:
|
||||
\\log p(y|\\lambda(f))
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(link_f, y, extra_data=extra_data)
|
||||
|
||||
def dlogpdf_df(self, f, y, extra_data=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
def d2logpdf_df2(self, f, y, extra_data=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
|
||||
def d3logpdf_df3(self, f, y, extra_data=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||
Uses the Faa di Bruno's formula for the chain rule
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
|
||||
|
||||
:param f: latent variables f
|
||||
:type f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in student t distribution - not used
|
||||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
|
||||
def dlogpdf_dtheta(self, f, y, extra_data=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([1, 0])
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([f.shape[0], 0])
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, extra_data=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
|
||||
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
|
||||
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
|
||||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
|
||||
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
|
||||
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000):
|
||||
"""
|
||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||
|
||||
:param mu: mean of the latent variable, f, of posterior
|
||||
:param var: variance of the latent variable, f, of posterior
|
||||
:param full_cov: whether to use the full covariance or just the diagonal
|
||||
:type full_cov: Boolean
|
||||
:param num_samples: number of samples to use in computing quantiles and
|
||||
possibly mean variance
|
||||
:type num_samples: integer
|
||||
:param sampling: Whether to use samples for mean and variances anyway
|
||||
:type sampling: Boolean
|
||||
|
||||
"""
|
||||
|
||||
if sampling:
|
||||
#Get gp_samples f* using posterior mean and variance
|
||||
if not full_cov:
|
||||
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
|
||||
size=num_samples).T
|
||||
else:
|
||||
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
|
||||
size=num_samples).T
|
||||
#Push gp samples (f*) through likelihood to give p(y*|f*)
|
||||
samples = self.samples(gp_samples)
|
||||
axis=-1
|
||||
|
||||
#Calculate mean, variance and precentiles from samples
|
||||
warnings.warn("Using sampling to calculate mean, variance and predictive quantiles.")
|
||||
pred_mean = np.mean(samples, axis=axis)[:,None]
|
||||
pred_var = np.var(samples, axis=axis)[:,None]
|
||||
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
|
||||
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
|
||||
|
||||
else:
|
||||
pred_mean = self.predictive_mean(mu, var)
|
||||
pred_var = self.predictive_variance(mu, var, pred_mean)
|
||||
warnings.warn("Predictive quantiles are only computed when sampling.")
|
||||
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
|
||||
q3 = q1.copy()
|
||||
|
||||
return pred_mean, pred_var, q1, q3
|
||||
|
||||
def samples(self, gp):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
@ -1,15 +1,14 @@
|
|||
from __future__ import division
|
||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||
# Copyright (c) 2012-2014 Ricardo Andrade, Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||
import gp_transformations
|
||||
from noise_distributions import NoiseDistribution
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
|
||||
class Poisson(NoiseDistribution):
|
||||
class Poisson(Likelihood):
|
||||
"""
|
||||
Poisson likelihood
|
||||
|
||||
|
|
@ -19,13 +18,19 @@ class Poisson(NoiseDistribution):
|
|||
.. Note::
|
||||
Y is expected to take values in {0,1,2,...}
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Log()
|
||||
|
||||
def _preprocess_values(self,Y): #TODO
|
||||
return Y
|
||||
super(Poisson, self).__init__(gp_link, name='Poisson')
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
def _conditional_mean(self, f):
|
||||
"""
|
||||
the expected value of y given a value of f
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
|
|
@ -36,14 +41,14 @@ class Poisson(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
return np.prod(stats.poisson.pmf(y,link_f))
|
||||
|
||||
def logpdf_link(self, link_f, y, extra_data=None):
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
|
|
@ -54,7 +59,7 @@ class Poisson(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
|
|
@ -62,7 +67,7 @@ class Poisson(NoiseDistribution):
|
|||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -73,7 +78,7 @@ class Poisson(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -81,7 +86,7 @@ class Poisson(NoiseDistribution):
|
|||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
return y/link_f - 1
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
|
|
@ -94,7 +99,7 @@ class Poisson(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
|
|
@ -109,7 +114,7 @@ class Poisson(NoiseDistribution):
|
|||
#transf = self.gp_link.transf(gp)
|
||||
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
|
|
@ -120,7 +125,7 @@ class Poisson(NoiseDistribution):
|
|||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param extra_data: extra_data which is not used in poisson distribution
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
|
|
@ -128,19 +133,19 @@ class Poisson(NoiseDistribution):
|
|||
d3lik_dlink3 = 2*y/(link_f)**3
|
||||
return d3lik_dlink3
|
||||
|
||||
def _mean(self,gp):
|
||||
def conditional_mean(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
The mean of the random variable conditioned on one value of the GP
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def _variance(self,gp):
|
||||
def conditional_variance(self,gp):
|
||||
"""
|
||||
Mass (or density) function
|
||||
The variance of the random variable conditioned on one value of the GP
|
||||
"""
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def samples(self, gp):
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
279
GPy/likelihoods/student_t.py
Normal file
279
GPy/likelihoods/student_t.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
# Copyright (c) 2012-2014 Ricardo Andrade, Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from scipy import stats, integrate
|
||||
from scipy.special import gammaln, gamma
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
|
||||
class StudentT(Likelihood):
|
||||
"""
|
||||
Student T likelihood
|
||||
|
||||
For nomanclature see Bayesian Data Analysis 2003 p576
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||
|
||||
"""
|
||||
def __init__(self,gp_link=None, deg_free=5, sigma2=2):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||
# sigma2 is not a noise parameter, it is a squared scale.
|
||||
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
|
||||
self.v = Param('deg_free', float(deg_free))
|
||||
self.link_parameter(self.sigma2)
|
||||
self.link_parameter(self.v)
|
||||
self.v.constrain_fixed()
|
||||
|
||||
self.log_concave = False
|
||||
|
||||
def parameters_changed(self):
|
||||
self.variance = (self.v / float(self.v - 2)) * self.sigma2
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
self.sigma2.gradient = grads[0]
|
||||
self.v.gradient = grads[1]
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#Careful gamma(big_number) is infinity!
|
||||
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
|
||||
/ (np.sqrt(self.v * np.pi * self.sigma2)))
|
||||
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
|
||||
)
|
||||
return np.prod(objective)
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
|
||||
|
||||
:param inv_link_f: latent variables (link(f))
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
|
||||
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||
- gammaln(self.v * 0.5)
|
||||
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
)
|
||||
return np.sum(objective)
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
|
||||
|
||||
:param inv_link_f: latent variables (f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
|
||||
|
||||
:param inv_link_f: latent variables inv_link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||
((e**2 + self.sigma2*self.v)**3)
|
||||
)
|
||||
return d3lik_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
return np.sum(dlogpdf_dvar)
|
||||
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
|
||||
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
|
||||
|
||||
.. math::
|
||||
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in student t distribution
|
||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||
/ ((self.sigma2*self.v + (e**2))**3)
|
||||
)
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
# The comment here confuses mean and median.
|
||||
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
if self.deg_free<=2.:
|
||||
return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
|
||||
else:
|
||||
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
|
||||
|
||||
def conditional_mean(self, gp):
|
||||
return self.gp_link.transf(gp)
|
||||
|
||||
def conditional_variance(self, gp):
|
||||
return self.deg_free/(self.deg_free - 2.)
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
#FIXME: Very slow as we are computing a new random variable per input!
|
||||
#Can't get it to sample all at the same time
|
||||
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
|
||||
dfs = np.ones_like(gp)*self.v
|
||||
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
|
||||
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
|
||||
scale=scales)
|
||||
return student_t_samples.reshape(orig_shape)
|
||||
Loading…
Add table
Add a link
Reference in a new issue