[huge merge] trying to merge old master and master

This commit is contained in:
Max Zwiessele 2014-11-21 16:17:03 +00:00
commit 180650ec85
308 changed files with 27071 additions and 24550 deletions

View file

@ -1,7 +1,8 @@
from ep import EP
from laplace import Laplace
from ep_mixed_noise import EP_Mixed_Noise
from bernoulli import Bernoulli
from exponential import Exponential
from gaussian import Gaussian
from gaussian_mixed_noise import Gaussian_Mixed_Noise
import noise_models
from noise_model_constructors import *
from gamma import Gamma
from poisson import Poisson
from student_t import StudentT
from likelihood import Likelihood
from mixed_noise import MixedNoise

View file

@ -0,0 +1,224 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from scipy import stats
class Bernoulli(Likelihood):
"""
Bernoulli likelihood
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
.. Note::
Y takes values in either {-1, 1} or {0, 1}.
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
.. See also::
likelihood.py, for the parent class
"""
def __init__(self, gp_link=None):
if gp_link is None:
gp_link = link_functions.Probit()
super(Bernoulli, self).__init__(gp_link, 'Bernoulli')
if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
self.log_concave = True
def _preprocess_values(self, Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1, 1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Bernoulli likelihood is meant to be used only with outputs in {0, 1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def moments_match_ep(self, Y_i, tau_i, v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if Y_i == 1:
sign = 1.
elif Y_i == 0 or Y_i == -1:
sign = -1
else:
raise ValueError("bad value for Bernoulli observation (0, 1)")
if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link, link_functions.Heaviside):
a = sign*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
else:
#TODO: do we want to revert to numerical quadrature here?
raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.__name__))
return Z_hat, mu_hat, sigma2_hat
def predictive_mean(self, mu, variance, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance))
elif isinstance(self.gp_link, link_functions.Heaviside):
return stats.norm.cdf(mu/np.sqrt(variance))
else:
raise NotImplementedError
def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Heaviside):
return 0.
else:
return np.nan
def pdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Likelihood function given inverse link of f.
.. math::
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
.. Note:
Each y_i must be in {0, 1}
"""
#objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y))
return np.where(y, inv_link_f, 1.-inv_link_f)
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Log Likelihood function given inverse link of f.
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points inverse link of f.
:rtype: float
"""
#objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f)
p = np.where(y==1, inv_link_f, 1.-inv_link_f)
return np.log(np.clip(p, 1e-6 ,np.inf))
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
.. math::
\\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))}
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
"""
#grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f)
#grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f))
ff = np.clip(inv_link_f, 1e-6, 1-1e-6)
denom = np.where(y, ff, -(1-ff))
return 1./denom
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
.. math::
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
:param inv_link_f: latent variables inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
"""
#d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2)
#d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f))
arg = np.where(y, inv_link_f, 1.-inv_link_f)
ret = -1./np.square(np.clip(arg, 1e-3, np.inf))
if np.any(np.isinf(ret)):
stop
return ret
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}}
:param inv_link_f: latent variables passed through inverse link of f.
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points inverse_link(f)
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
#d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3))
state = np.seterr(divide='ignore')
# TODO check y \in {0, 1} or {-1, 1}
d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3))
np.seterr(**state)
return d3logpdf_dlink3
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
ns = np.ones_like(gp, dtype=int)
Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass

View file

@ -1,392 +0,0 @@
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP(likelihood):
def __init__(self,data,noise_model):
"""
Expectation Propagation
:param data: data to model
:type data: numpy array
:param noise_model: noise distribution
:type noise_model: A GPy noise model
"""
self.noise_model = noise_model
self.data = data
self.num_data, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.num_params = 0
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.num_data)
#initial values for the GP variables
self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.num_data)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
super(EP, self).__init__()
def restart(self):
self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.num_data)
self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.num_data)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov,**noise_args):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model.predictive_values(mu,var,**noise_args)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
#return np.zeros(0)
return self.noise_model._get_params()
def _get_param_names(self):
#return []
return self.noise_model._get_param_names()
def _set_params(self,p):
#pass # TODO: the EP likelihood might want to take some parameters...
self.noise_model._set_params(p)
def _gradients(self,partial):
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
return self.noise_model._gradients(partial)
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Z += 0.5*self.num_data*np.log(2*np.pi)
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
"""
self.epsilon = epsilon
self.eta, self.delta = power_ep
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.num_data)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
"""
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.num_data)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
"""
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.num_data)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.num_data)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.num_data)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -1,385 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP_Mixed_Noise(likelihood):
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
:param data_list: list of outputs
:param noise_model_list: a list of noise models
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
:type epsilon: float
:param power_ep: list of power ep parameters
"""
assert len(data_list) == len(noise_model_list)
self.noise_model_list = noise_model_list
n_list = [data.size for data in data_list]
self.n_models = len(data_list)
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.num_params = 0#FIXME
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
#TODO non-gaussian index
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def restart(self):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
#_mu = []
#_var = []
#_q1 = []
#_q2 = []
#for m,v,o in zip(mu,var,output.flatten()):
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
# _mu.append(a)
# _var.append(b)
# _q1.append(c)
# _q2.append(d)
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
return self.noise_model_list[noise_model].predictive_values(mu,var)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
names = []
for noise_model in self.noise_model_list:
names += noise_model._get_param_names()
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
def _gradients(self,partial):
#NOTE this is not tested
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.N)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -1,15 +1,14 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Copyright (c) 2012-2014 GPy Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
import link_functions
from likelihood import Likelihood
class Exponential(NoiseDistribution):
class Exponential(Likelihood):
"""
Expoential likelihood
Y is expected to take values in {0,1,2,...}
@ -18,13 +17,12 @@ class Exponential(NoiseDistribution):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self,gp_link=None):
if gp_link is None:
gp_link = link_functions.Log()
super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
def _preprocess_values(self,Y):
return Y
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -35,16 +33,15 @@ class Exponential(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = link_f*np.exp(-y*link_f)
return np.exp(np.sum(np.log(log_objective)))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -55,17 +52,16 @@ class Exponential(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -76,7 +72,7 @@ class Exponential(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -86,7 +82,7 @@ class Exponential(NoiseDistribution):
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -99,7 +95,7 @@ class Exponential(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -112,7 +108,7 @@ class Exponential(NoiseDistribution):
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -123,7 +119,7 @@ class Exponential(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
@ -132,18 +128,6 @@ class Exponential(NoiseDistribution):
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -1,15 +1,15 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
from ..core.parameterization import Param
import link_functions
from likelihood import Likelihood
class Gamma(NoiseDistribution):
class Gamma(Likelihood):
"""
Gamma likelihood
@ -18,14 +18,16 @@ class Gamma(NoiseDistribution):
\\alpha_{i} = \\beta y_{i}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self,gp_link=None,beta=1.):
if gp_link is None:
gp_link = link_functions.Log()
super(Gamma, self).__init__(gp_link, 'Gamma')
def _preprocess_values(self,Y):
return Y
self.beta = Param('beta', beta)
self.link_parameter(self.beta)
self.beta.fix()#TODO: gradients!
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -37,7 +39,7 @@ class Gamma(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
@ -47,7 +49,7 @@ class Gamma(NoiseDistribution):
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -59,7 +61,7 @@ class Gamma(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
@ -71,7 +73,7 @@ class Gamma(NoiseDistribution):
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -83,7 +85,7 @@ class Gamma(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -94,7 +96,7 @@ class Gamma(NoiseDistribution):
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -108,7 +110,7 @@ class Gamma(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -122,7 +124,7 @@ class Gamma(NoiseDistribution):
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -134,22 +136,10 @@ class Gamma(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta

View file

@ -1,114 +1,318 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#TODO
"""
A lot of this code assumes that the link function is the identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
Furthermore, exact Guassian inference can only be done for the identity link, so we should be asserting so for all calls which relate to that.
James 11/12/13
"""
import numpy as np
from likelihood import likelihood
from ..util.linalg import jitchol
from scipy import stats, special
import link_functions
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from scipy import stats
class Gaussian(likelihood):
class Gaussian(Likelihood):
"""
Likelihood class for doing Expectation propagation
Gaussian likelihood
:param data: observed output
:type data: Nx1 numpy.darray
:param variance: noise parameter
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param variance: variance value of the Gaussian distribution
:param N: Number of data points
:type N: int
"""
def __init__(self, data, variance=1., normalize=False):
self.is_heteroscedastic = False
self.num_params = 1
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.output_dim = data.shape
def __init__(self, gp_link=None, variance=1., name='Gaussian_noise'):
if gp_link is None:
gp_link = link_functions.Identity()
# normalization
if normalize:
self._offset = data.mean(0)[None, :]
self._scale = data.std(0)[None, :]
# Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale == 0.)] = 1.0e-3
else:
self._offset = np.zeros((1, self.output_dim))
self._scale = np.ones((1, self.output_dim))
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
self.set_data(data)
super(Gaussian, self).__init__(gp_link, name=name)
self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance))
self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance)
super(Gaussian, self).__init__()
if isinstance(gp_link, link_functions.Identity):
self.log_concave = True
def set_data(self, data):
self.data = data
self.N, D = data.shape
assert D == self.output_dim
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
self.YYT = np.dot(self.Y, self.Y.T)
self.trYYT = np.trace(self.YYT)
self.YYT_factor = jitchol(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
self.YYT_factor = self.Y
def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here
return Y/self.gaussian_variance(Y_metadata)
def _get_params(self):
return np.asarray(self._variance)
def gaussian_variance(self, Y_metadata=None):
return self.variance
def _get_param_names(self):
return ["noise_variance"]
def update_gradients(self, grad):
self.variance.gradient = grad
def _set_params(self, x):
x = np.float64(x)
if np.all(self._variance != x):
if x == 0.:#special case of zero noise
self.precision = np.inf
self.V = None
else:
self.precision = 1. / x
self.V = (self.precision) * self.Y
self.VVT_factor = self.precision * self.YYT_factor
self.covariance_matrix = np.eye(self.N) * x
self._variance = x
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
return dL_dKdiag.sum()
def predictive_values(self, mu, var, full_cov, **likelihood_args):
def _preprocess_values(self, Y):
"""
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
Check if the values of the observations correspond to the values
assumed by the likelihood function.
"""
mean = mu * self._scale + self._offset
return Y
def _moments_match_ep(self, data_i, tau_i, v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
sigma2_hat = 1./(1./self.variance + tau_i)
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
sum_var = self.variance + 1./tau_i
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
if full_cov:
if self.output_dim > 1:
raise NotImplementedError, "TODO"
# Note. for output_dim>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below.
# note that the upper, lower quantiles should be the same shape as mean
# Augment the output variance with the likelihood variance and rescale.
true_var = (var + np.eye(var.shape[0]) * self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
if var.ndim == 2:
var += np.eye(var.shape[0])*self.variance
if var.ndim == 3:
var += np.atleast_3d(np.eye(var.shape[0])*self.variance)
else:
true_var = (var + self._variance) * self._scale ** 2
_5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc
var += self.variance
return mu, var
def predictive_mean(self, mu, sigma):
return mu
def predictive_variance(self, mu, sigma, predictive_mean=None):
return self.variance + sigma**2
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles]
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: likelihood evaluated for this point
:rtype: float
"""
#Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: log likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0]
ln_det_cov = N*np.log(self.variance)
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f
return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0]
hess = -(1.0/self.variance)*np.ones((N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0]
d3logpdf_dlink3 = np.zeros((N,1))
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f
s_4 = 1.0/(self.variance**2)
N = y.shape[0]
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
N = y.shape[0]
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dvar
def _mean(self, gp):
"""
Expected value of y under the Mass (or density) function p(y|f)
.. math::
E_{p(y|f)}[y]
"""
return self.gp_link.transf(gp)
def _variance(self, gp):
"""
Variance of y under the Mass (or density) function p(y|f)
.. math::
Var_{p(y|f)}[y]
"""
return self.variance
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
.. Note:
Works as if each test point was provided individually, i.e. not full_cov
assumes independence
"""
y_rescaled = (y_test - self._offset)/self._scale
return -0.5*np.log(2*np.pi) -0.5*np.log(var_star + self._variance) -0.5*(np.square(y_rescaled - mu_star))/(var_star + self._variance)
v = var_star + self.variance
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def _gradients(self, partial):
return np.sum(partial)

View file

@ -1,108 +0,0 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
from . import Gaussian
class Gaussian_Mixed_Noise(likelihood):
"""
Gaussian Likelihood for multiple outputs
This is a wrapper around likelihood.Gaussian class
:param data_list: data observations
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param noise_params: noise parameters of each output
:type noise_params: list of floats, one per output
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self, data_list, noise_params=None, normalize=True):
self.num_params = len(data_list)
self.n_list = [data.size for data in data_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.num_params),self.n_list)])
if noise_params is None:
noise_params = [1.] * self.num_params
else:
assert self.num_params == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self._offset = np.zeros((1, self.output_dim))
self._scale = np.ones((1, self.output_dim))
self.is_heteroscedastic = True
self.Z = 0. # a correction factor which accounts for the approximation made
self.set_data(data_list)
self._set_params(np.asarray(noise_params))
super(Gaussian_Mixed_Noise, self).__init__()
def set_data(self, data_list):
self.data = np.vstack(data_list)
self.N, D = self.data.shape
assert D == self.output_dim
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
raise NotImplementedError
#self.YYT = np.dot(self.Y, self.Y.T)
#self.trYYT = np.trace(self.YYT)
#self.YYT_factor = jitchol(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
self.YYT_factor = self.Y
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
if len(self.noise_model_list) == 1:
names = self.noise_model_list[0]._get_param_names()
else:
names = []
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.precision * self.YYT_factor
self.covariance_matrix = np.eye(self.N) * 1./self.precision
def _gradients(self,partial):
gradients = []
aux = np.cumsum([0]+self.n_list)
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
gradients += [noise_model._gradients(partial[ai:af])]
return np.hstack(gradients)

View file

@ -1,60 +1,77 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import copy
from ..core.parameterized import Parameterized
from scipy import stats,special
import scipy as sp
import link_functions
from ..util.misc import chain_1, chain_2, chain_3
from scipy.integrate import quad
import warnings
from ..core.parameterization import Parameterized
class likelihood(Parameterized):
class Likelihood(Parameterized):
"""
The atom for a likelihood class
Likelihood base class, used to defing p(y|f).
This object interfaces the GP and the data. The most basic likelihood
(Gaussian) inherits directly from this, as does the EP algorithm
All instances use _inverse_ link functions, which can be swapped out. It is
expected that inheriting classes define a default inverse link function
Some things must be defined for this to work properly:
To use this class, inherit and define missing functionality.
- self.Y : the effective Gaussian target of the GP
- self.N, self.D : Y.shape
- self.covariance_matrix : the effective (noise) covariance of the GP targets
- self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
- self.is_heteroscedastic : enables significant computational savings in GP
- self.precision : a scalar or vector representation of the effective target precision
- self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
- self.V : self.precision * self.Y
Inheriting classes *must* implement:
pdf_link : a bound method which turns the output of the link function into the pdf
logpdf_link : the logarithm of the above
To enable use with EP, inheriting classes *must* define:
TODO: a suitable derivative function for any parameters of the class
It is also desirable to define:
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
To enable use with Laplace approximation, inheriting classes *must* define:
Some derivative functions *AS TODO*
For exact Gaussian inference, define *JH TODO*
"""
def __init__(self):
Parameterized.__init__(self)
self.dZ_dK = 0
def __init__(self, gp_link, name):
super(Likelihood, self).__init__(name)
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.log_concave = False
def _get_params(self):
def _gradients(self,partial):
return np.zeros(0)
def update_gradients(self, partial):
if self.size > 0:
raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized')
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output
:type Y: Nx1 numpy.darray
"""
return Y
def conditional_mean(self, gp):
"""
The mean of the random variable conditioned on one value of the GP
"""
raise NotImplementedError
def _get_param_names(self):
raise NotImplementedError
def _set_params(self, x):
raise NotImplementedError
def fit_full(self, K):
def conditional_variance(self, gp):
"""
No approximations needed by default
The variance of the random variable conditioned on one value of the GP
"""
pass
def restart(self):
"""
No need to restart if not an approximation
"""
pass
def _gradients(self, partial):
raise NotImplementedError
def predictive_values(self, mu, var):
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the predictive density
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
@ -66,4 +83,391 @@ class likelihood(Parameterized):
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
def integral_generator(y, m, v):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(f_star):
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
return np.log(p_ystar)
def _moments_match_ep(self,obs,tau,v):
"""
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment.
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
mu = v/tau
def int_1(f):
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
#Compute second integral for first moment
def int_2(f):
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
mean, accuracy = quad(int_2, -np.inf, np.inf)
mean /= z_scaled
#Compute integral for variance
def int_3(f):
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
Ef2 /= z_scaled
variance = Ef2 - mean**2
#Add constant to the zeroth moment
#NOTE: this constant is not needed in the other moments because it cancells out.
z = z_scaled/np.sqrt(2*np.pi/tau)
return z, mean, variance
def variational_expectations(self, Y, m, v, gh_points=None):
"""
Use Gauss-Hermite Quadrature to compute
E_p(f) [ log p(y|f) ]
d/dm E_p(f) [ log p(y|f) ]
d/dv E_p(f) [ log p(y|f) ]
where p(f) is a Gaussian with mean m and variance v. The shapes of Y, m and v should match.
if no gh_points are passed, we construct them using defualt options
"""
if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(12)
else:
gh_x, gh_w = gh_points
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = self.logpdf(X,Y[:,None])
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
#clipping for numerical stability
logp = np.clip(logp,-1e6,1e6)
dlogp_dx = np.clip(dlogp_dx,-1e6,1e6)
d2logp_dx2 = np.clip(d2logp_dx2,-1e6,1e6)
#average over the gird to get derivatives of the Gaussian's parameters
F = np.dot(logp, gh_w)
dF_dm = np.dot(dlogp_dx, gh_w)
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
stop
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape)
def predictive_mean(self, mu, variance, Y_metadata=None):
"""
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
"""
#conditional_mean: the edpected value of y given some f, under this likelihood
def int_mean(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError, "implement this function to make predictions"
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
"""
Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
#sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*variance)
# E( V(Y_star|f_star) )
def int_var(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_variance will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
#E( E(Y_star|f_star) )**2
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,variance)
predictive_mean_sq = predictive_mean**2
#E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean**2 will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq
# V(Y_star) = E[ V(Y_star|f_star) ] + V[ E(Y_star|f_star) ]
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp
def pdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None):
raise NotImplementedError
def pdf(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the likelihood (pdf) using it
.. math:
p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
.. math:
\\log p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
def d2logpdf_df2(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def d3logpdf_df3(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
# There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
"""
Compute mean, variance of the predictive distibution.
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
"""
pred_mean = self.predictive_mean(mu, var, Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
return pred_mean, pred_var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
raise NotImplementedError

View file

@ -1,13 +1,14 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
class GPTransformation(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
@ -69,7 +70,7 @@ class Probit(GPTransformation):
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
return std_norm_cdf(f)
@ -86,6 +87,34 @@ class Probit(GPTransformation):
f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Cloglog(GPTransformation):
"""
Complementary log-log link
.. math::
p(f) = 1 - e^{-e^f}
or
f = \log (-\log(1-p))
"""
def transf(self,f):
return 1-np.exp(-np.exp(f))
def dtransf_df(self,f):
return np.exp(f-np.exp(f))
def d2transf_df2(self,f):
ef = np.exp(f)
return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f):
ef = np.exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2)
class Log(GPTransformation):
"""
.. math::
@ -94,16 +123,16 @@ class Log(GPTransformation):
"""
def transf(self,f):
return np.exp(f)
return np.exp(np.clip(f, -_lim_val, _lim_val))
def dtransf_df(self,f):
return np.exp(f)
return np.exp(np.clip(f, -_lim_val, _lim_val))
def d2transf_df2(self,f):
return np.exp(f)
return np.exp(np.clip(f, -_lim_val, _lim_val))
def d3transf_df3(self,f):
return np.exp(f)
return np.exp(np.clip(f, -_lim_val, _lim_val))
class Log_ex_1(GPTransformation):
"""

View file

@ -0,0 +1,82 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import link_functions
from likelihood import Likelihood
from gaussian import Gaussian
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from ..core.parameterization import Parameterized
import itertools
class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, name='mixed_noise'):
#NOTE at the moment this likelihood only works for using a list of gaussians
super(Likelihood, self).__init__(name=name)
self.link_parameters(*likelihoods_list)
self.likelihoods_list = likelihoods_list
self.log_concave = False
def gaussian_variance(self, Y_metadata):
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
ind = Y_metadata['output_index'].flatten()
variance = np.zeros(ind.size)
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
variance[ind==j] = lik.variance
return variance
def betaY(self,Y,Y_metadata):
#TODO not here.
return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]
def update_gradients(self, gradients):
self.gradient = gradients
def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
ind = Y_metadata['output_index'].flatten()
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
ind = Y_metadata['output_index'].flatten()
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
if full_cov:
var += np.eye(var.shape[0])*_variance
else:
var += _variance
return mu, var
def predictive_variance(self, mu, sigma, Y_metadata):
_variance = self.gaussian_variance(Y_metadata)
return _variance + sigma**2
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
ind = Y_metadata['output_index'].flatten()
outputs = np.unique(ind)
Q = np.zeros( (mu.size,len(quantiles)) )
for j in outputs:
q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:],
var[ind==j,:],quantiles,Y_metadata=None)
Q[ind==j,:] = np.hstack(q)
return [q[:,None] for q in Q.T]
def samples(self, gp, Y_metadata):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
N1, N2 = gp.shape
Ysim = np.zeros((N1,N2))
ind = Y_metadata['output_index'].flatten()
for j in np.unique(ind):
flt = ind==j
gp_filtered = gp[flt,:]
n1 = gp_filtered.shape[0]
lik = self.likelihoods_list[j]
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
Ysim[flt,:] = _ysim.reshape(n1,N2)
return Ysim

View file

@ -1,121 +0,0 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import noise_models
def bernoulli(gp_link=None):
"""
Construct a bernoulli likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Probit()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
if isinstance(gp_link,noise_models.gp_transformations.Probit):
analytical_mean = True
analytical_variance = False
elif isinstance(gp_link,noise_models.gp_transformations.Heaviside):
analytical_mean = True
analytical_variance = True
else:
analytical_mean = False
analytical_variance = False
return noise_models.bernoulli_noise.Bernoulli(gp_link,analytical_mean,analytical_variance)
def exponential(gp_link=None):
"""
Construct a exponential likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
def gaussian_ep(gp_link=None,variance=1.):
"""
Construct a gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
def poisson(gp_link=None):
"""
Construct a Poisson likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
def gamma(gp_link=None,beta=1.):
"""
Construct a Gamma likelihood
:param gp_link: a GPy gp_link function
:param beta: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
def gaussian(gp_link=None, variance=2, D=None, N=None):
"""
Construct a Gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: variance
:type variance: scalar
:returns: Gaussian noise model:
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True # ?
return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean,
analytical_variance, variance=variance, D=D, N=N)
def student_t(gp_link=None, deg_free=5, sigma2=2):
"""
Construct a Student t likelihood
:param gp_link: a GPy gp_link function
:param deg_free: degrees of freedom of student-t
:type deg_free: scalar
:param sigma2: variance
:type sigma2: scalar
:returns: Student-T noise model
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True
return noise_models.student_t_noise.StudentT(gp_link, analytical_mean,
analytical_variance,deg_free, sigma2)

View file

@ -1,8 +0,0 @@
import noise_distributions
import bernoulli_noise
import exponential_noise
import gaussian_noise
import gamma_noise
import poisson_noise
import student_t_noise
import gp_transformations

View file

@ -1,300 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Gaussian(NoiseDistribution):
"""
Gaussian likelihood
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param variance: variance value of the Gaussian distribution
:param N: Number of data points
:type N: int
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
self.variance = variance
self.N = N
self._set_params(np.asarray(variance))
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
if isinstance(gp_link , gp_transformations.Identity):
self.log_concave = True
def _get_params(self):
return np.array([self.variance])
def _get_param_names(self):
return ['noise_model_variance']
def _set_params(self, p):
self.variance = float(p)
self.I = np.eye(self.N)
self.covariance_matrix = self.I * self.variance
self.Ki = self.I*(1.0 / self.variance)
#self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
self.ln_det_K = self.N*np.log(self.variance)
def _gradients(self,partial):
return np.zeros(1)
#return np.sum(partial)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
"""
return Y
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
sigma2_hat = 1./(1./self.variance + tau_i)
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
sum_var = self.variance + 1./tau_i
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma,predictive_mean=None):
return 1./(1./self.variance + 1./sigma**2)
def _mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use pdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _nlog_mass(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use logpdf in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _dnlog_mass_dgp(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use dlogpdf_df in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def _d2nlog_mass_dgp2(self, link_f, y, extra_data=None):
NotImplementedError("Deprecated, now doing chain in noise_model.py for link function evaluation\
Please negate your function and use d2logpdf_df2 in noise_model.py, if implementing a likelihood\
rederivate the derivative without doing the chain and put in logpdf, dlogpdf_dlink or\
its derivatives")
def pdf_link(self, link_f, y, extra_data=None):
"""
Likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: likelihood evaluated for this point
:rtype: float
"""
#Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, extra_data=None):
"""
Log likelihood function given link(f)
.. math::
\\ln p(y_{i}|\\lambda(f_{i})) = -\\frac{N \\ln 2\\pi}{2} - \\frac{\\ln |K|}{2} - \\frac{(y_{i} - \\lambda(f_{i}))^{T}\\sigma^{-2}(y_{i} - \\lambda(f_{i}))}{2}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: log likelihood evaluated for this point
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{1}{\\sigma^{2}}(y_{i} - \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
"""
Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}f} = -\\frac{1}{\\sigma^{2}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = 0
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\sigma^{2}} = -\\frac{N}{2\\sigma^{2}} + \\frac{(y_{i} - \\lambda(f_{i}))^{2}}{2\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f
s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)}) = \\frac{1}{\\sigma^{4}}(-y_{i} + \\lambda(f_{i}))
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{2}\\lambda(f)}) = \\frac{1}{\\sigma^{4}}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
def _mean(self,gp):
"""
Expected value of y under the Mass (or density) function p(y|f)
.. math::
E_{p(y|f)}[y]
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Variance of y under the Mass (or density) function p(y|f)
.. math::
Var_{p(y|f)}[y]
"""
return self.variance
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape)

View file

@ -1,434 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
import pylab as pb
from GPy.util.plot import gpplot
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from GPy.util.misc import chain_1, chain_2, chain_3
from scipy.integrate import quad
import warnings
class NoiseDistribution(object):
"""
Likelihood class for doing approximations
"""
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.analytical_mean = analytical_mean
self.analytical_variance = analytical_variance
if self.analytical_mean:
self.moments_match = self._moments_match_analytical
self.predictive_mean = self._predictive_mean_analytical
else:
self.moments_match = self._moments_match_numerical
self.predictive_mean = self._predictive_mean_numerical
if self.analytical_variance:
self.predictive_variance = self._predictive_variance_analytical
else:
self.predictive_variance = self._predictive_variance_numerical
self.log_concave = False
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass
def _gradients(self,partial):
return np.zeros(0)
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output
:type Y: Nx1 numpy.darray
"""
return Y
def _moments_match_analytical(self,obs,tau,v):
"""
If available, this function computes the moments analytically.
"""
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
def integral_generator(y, m, v):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(f_star):
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
return np.log(p_ystar)
def _moments_match_numerical(self,obs,tau,v):
"""
Calculation of moments using quadrature
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
#Compute first integral for zeroth moment.
#NOTE constant np.sqrt(2*pi/tau) added at the end of the function
mu = v/tau
def int_1(f):
return self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
z_scaled, accuracy = quad(int_1, -np.inf, np.inf)
#Compute second integral for first moment
def int_2(f):
return f*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
mean, accuracy = quad(int_2, -np.inf, np.inf)
mean /= z_scaled
#Compute integral for variance
def int_3(f):
return (f**2)*self.pdf(f, obs)*np.exp(-0.5*tau*np.square(mu-f))
Ef2, accuracy = quad(int_3, -np.inf, np.inf)
Ef2 /= z_scaled
variance = Ef2 - mean**2
#Add constant to the zeroth moment
#NOTE: this constant is not needed in the other moments because it cancells out.
z = z_scaled/np.sqrt(2*np.pi/tau)
return z, mean, variance
def _predictive_mean_analytical(self,mu,sigma):
"""
Predictive mean
.. math::
E(Y^{*}|Y) = E( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive mean analytically.
"""
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma):
"""
Predictive variance
.. math::
V(Y^{*}| Y) = E( V(Y^{*}|f^{*}, Y) ) + V( E(Y^{*}|f^{*}, Y) )
If available, this function computes the predictive variance analytically.
"""
raise NotImplementedError
def _predictive_mean_numerical(self,mu,variance):
"""
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
"""
#import ipdb; ipdb.set_trace()
def int_mean(f,m,v):
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m))
#scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
scaled_mean = [quad(int_mean, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _predictive_variance_numerical(self,mu,variance,predictive_mean=None):
"""
Numerical approximation to the predictive variance: V(Y_star)
The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: mean of posterior
:param sigma: standard deviation of posterior
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
normalizer = np.sqrt(2*np.pi*variance)
# E( V(Y_star|f_star) )
def int_var(f,m,v):
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m))
#Most of the weight is within 6 stds and this avoids some negative infinity and infinity problems of taking f^2
scaled_exp_variance = [quad(int_var, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
#E( E(Y_star|f_star) )**2
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,variance)
predictive_mean_sq = predictive_mean**2
#E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v):
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m))
scaled_exp_exp2 = [quad(int_pred_mean_sq, mj-6*np.sqrt(s2j), mj+6*np.sqrt(s2j), args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
return exp_var + var_exp
def pdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def logpdf_link(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
raise NotImplementedError
def pdf(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the likelihood (pdf) using it
.. math:
p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, extra_data=extra_data)
def logpdf(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
.. math:
\\log p(y|\\lambda(f))
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, extra_data=extra_data)
def dlogpdf_df(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d\\log p(y|\\lambda(f))}{df} = \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d\\lambda(f)}{df}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
def d2logpdf_df2(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{2}\\log p(y|\\lambda(f))}{df^{2}} = \\frac{d^{2}\\log p(y|\\lambda(f))}{d^{2}\\lambda(f)}\\left(\\frac{d\\lambda(f)}{df}\\right)^{2} + \\frac{d\\log p(y|\\lambda(f))}{d\\lambda(f)}\\frac{d^{2}\\lambda(f)}{df^{2}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def d3logpdf_df3(self, f, y, extra_data=None):
"""
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
.. math::
\\frac{d^{3}\\log p(y|\\lambda(f))}{df^{3}} = \\frac{d^{3}\\log p(y|\\lambda(f)}{d\\lambda(f)^{3}}\\left(\\frac{d\\lambda(f)}{df}\\right)^{3} + 3\\frac{d^{2}\\log p(y|\\lambda(f)}{d\\lambda(f)^{2}}\\frac{d\\lambda(f)}{df}\\frac{d^{2}\\lambda(f)}{df^{2}} + \\frac{d\\log p(y|\\lambda(f)}{d\\lambda(f)}\\frac{d^{3}\\lambda(f)}{df^{3}}
:param f: latent variables f
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def dlogpdf_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self, mu, var, full_cov=False, sampling=False, num_samples=10000):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
:param num_samples: number of samples to use in computing quantiles and
possibly mean variance
:type num_samples: integer
:param sampling: Whether to use samples for mean and variances anyway
:type sampling: Boolean
"""
if sampling:
#Get gp_samples f* using posterior mean and variance
if not full_cov:
gp_samples = np.random.multivariate_normal(mu.flatten(), np.diag(var.flatten()),
size=num_samples).T
else:
gp_samples = np.random.multivariate_normal(mu.flatten(), var,
size=num_samples).T
#Push gp samples (f*) through likelihood to give p(y*|f*)
samples = self.samples(gp_samples)
axis=-1
#Calculate mean, variance and precentiles from samples
warnings.warn("Using sampling to calculate mean, variance and predictive quantiles.")
pred_mean = np.mean(samples, axis=axis)[:,None]
pred_var = np.var(samples, axis=axis)[:,None]
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
else:
pred_mean = self.predictive_mean(mu, var)
pred_var = self.predictive_variance(mu, var, pred_mean)
warnings.warn("Predictive quantiles are only computed when sampling.")
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
q3 = q1.copy()
return pred_mean, pred_var, q1, q3
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
raise NotImplementedError

View file

@ -1,15 +1,14 @@
from __future__ import division
# Copyright (c) 2012, 2013 Ricardo Andrade
# Copyright (c) 2012-2014 Ricardo Andrade, Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
import link_functions
from likelihood import Likelihood
class Poisson(NoiseDistribution):
class Poisson(Likelihood):
"""
Poisson likelihood
@ -19,13 +18,19 @@ class Poisson(NoiseDistribution):
.. Note::
Y is expected to take values in {0,1,2,...}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self, gp_link=None):
if gp_link is None:
gp_link = link_functions.Log()
def _preprocess_values(self,Y): #TODO
return Y
super(Poisson, self).__init__(gp_link, name='Poisson')
def pdf_link(self, link_f, y, extra_data=None):
def _conditional_mean(self, f):
"""
the expected value of y given a value of f
"""
return self.gp_link.transf(gp)
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -36,14 +41,14 @@ class Poisson(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.prod(stats.poisson.pmf(y,link_f))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -54,7 +59,7 @@ class Poisson(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
@ -62,7 +67,7 @@ class Poisson(NoiseDistribution):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -73,7 +78,7 @@ class Poisson(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -81,7 +86,7 @@ class Poisson(NoiseDistribution):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -94,7 +99,7 @@ class Poisson(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -109,7 +114,7 @@ class Poisson(NoiseDistribution):
#transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -120,7 +125,7 @@ class Poisson(NoiseDistribution):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
@ -128,19 +133,19 @@ class Poisson(NoiseDistribution):
d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3
def _mean(self,gp):
def conditional_mean(self,gp):
"""
Mass (or density) function
The mean of the random variable conditioned on one value of the GP
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
def conditional_variance(self,gp):
"""
Mass (or density) function
The variance of the random variable conditioned on one value of the GP
"""
return self.gp_link.transf(gp)
def samples(self, gp):
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -0,0 +1,279 @@
# Copyright (c) 2012-2014 Ricardo Andrade, Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import scipy as sp
import link_functions
from scipy import stats, integrate
from scipy.special import gammaln, gamma
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
class StudentT(Likelihood):
"""
Student T likelihood
For nomanclature see Bayesian Data Analysis 2003 p576
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
"""
def __init__(self,gp_link=None, deg_free=5, sigma2=2):
if gp_link is None:
gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free))
self.link_parameter(self.sigma2)
self.link_parameter(self.v)
self.v.constrain_fixed()
self.log_concave = False
def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
self.sigma2.gradient = grads[0]
self.v.gradient = grads[1]
def pdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
.. math::
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
#Careful gamma(big_number) is infinity!
objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5))
/ (np.sqrt(self.v * np.pi * self.sigma2)))
* ((1 + (1./float(self.v))*((e**2)/float(self.sigma2)))**(-0.5*(self.v + 1)))
)
return np.prod(objective)
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)
:param inv_link_f: latent variables (link(f))
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
#FIXME:
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v}
:param inv_link_f: latent variables (f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}}
:param inv_link_f: latent variables inv_link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_dlink3
def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
.. math::
\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2}
:param inv_link_f: latent variables inv_link_f
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
.. math::
\\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}}
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None):
# The comment here confuses mean and median.
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
if self.deg_free<=2.:
return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2.
else:
return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
def conditional_mean(self, gp):
return self.gp_link.transf(gp)
def conditional_variance(self, gp):
return self.deg_free/(self.deg_free - 2.)
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#FIXME: Very slow as we are computing a new random variable per input!
#Can't get it to sample all at the same time
#student_t_samples = np.array([stats.t.rvs(self.v, self.gp_link.transf(gpj),scale=np.sqrt(self.sigma2), size=1) for gpj in gp])
dfs = np.ones_like(gp)*self.v
scales = np.ones_like(gp)*np.sqrt(self.sigma2)
student_t_samples = stats.t.rvs(dfs, loc=self.gp_link.transf(gp),
scale=scales)
return student_t_samples.reshape(orig_shape)