mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Merge from upstream
This commit is contained in:
commit
7930eb646f
16 changed files with 214 additions and 103 deletions
|
|
@ -89,7 +89,7 @@ class GP(Model):
|
|||
self.link_parameter(self.kern)
|
||||
self.link_parameter(self.likelihood)
|
||||
|
||||
def set_XY(self, X=None, Y=None):
|
||||
def set_XY(self, X=None, Y=None, trigger_update=True):
|
||||
"""
|
||||
Set the input / output data of the model
|
||||
This is useful if we wish to change our existing data but maintain the same model
|
||||
|
|
@ -99,7 +99,7 @@ class GP(Model):
|
|||
:param Y: output observations
|
||||
:type Y: np.ndarray
|
||||
"""
|
||||
self.update_model(False)
|
||||
if trigger_update: self.update_model(False)
|
||||
if Y is not None:
|
||||
if self.normalizer is not None:
|
||||
self.normalizer.scale_by(Y)
|
||||
|
|
@ -123,26 +123,26 @@ class GP(Model):
|
|||
self.link_parameters(self.X)
|
||||
else:
|
||||
self.X = ObsAr(X)
|
||||
self.update_model(True)
|
||||
self._trigger_params_changed()
|
||||
if trigger_update: self.update_model(True)
|
||||
if trigger_update: self._trigger_params_changed()
|
||||
|
||||
def set_X(self,X):
|
||||
def set_X(self,X, trigger_update=True):
|
||||
"""
|
||||
Set the input data of the model
|
||||
|
||||
:param X: input observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(X=X)
|
||||
self.set_XY(X=X, trigger_update=trigger_update)
|
||||
|
||||
def set_Y(self,Y):
|
||||
def set_Y(self,Y, trigger_update=True):
|
||||
"""
|
||||
Set the output data of the model
|
||||
|
||||
:param X: output observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(Y=Y)
|
||||
self.set_XY(Y=Y, trigger_update=trigger_update)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -214,14 +214,14 @@ class Model(Parameterized):
|
|||
self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
|
||||
return obj_f, self.obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs):
|
||||
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
|
||||
"""
|
||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
|
||||
kwargs are passed to the optimizer. They can be:
|
||||
|
||||
:param max_f_eval: maximum number of function evaluations
|
||||
:type max_f_eval: int
|
||||
:param max_iters: maximum number of function evaluations
|
||||
:type max_iters: int
|
||||
:messages: True: Display messages during optimisation, "ipython_notebook":
|
||||
:type messages: bool"string
|
||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
||||
|
|
|
|||
|
|
@ -62,6 +62,15 @@ class SparseGP(GP):
|
|||
|
||||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def set_Z(self, Z, trigger_update=True):
|
||||
if trigger_update: self.update_model(False)
|
||||
self.unlink_parameter(self.Z)
|
||||
from ..core import Param
|
||||
self.Z = Param('inducing inputs',Z)
|
||||
self.link_parameter(self.Z, index=0)
|
||||
if trigger_update: self.update_model(True)
|
||||
if trigger_update: self._trigger_params_changed()
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
|
||||
|
|
|
|||
|
|
@ -25,20 +25,15 @@ class SVGP(SparseGP):
|
|||
|
||||
Hensman, Matthews and Ghahramani, Scalable Variational GP Classification, ArXiv 1411.2005
|
||||
"""
|
||||
if batchsize is None:
|
||||
batchsize = X.shape[0]
|
||||
|
||||
self.X_all, self.Y_all = X, Y
|
||||
# how to rescale the batch likelihood in case of minibatches
|
||||
self.batchsize = batchsize
|
||||
batch_scale = float(self.X_all.shape[0])/float(self.batchsize)
|
||||
#KL_scale = 1./np.float64(self.mpi_comm.size)
|
||||
KL_scale = 1.0
|
||||
|
||||
import climin.util
|
||||
#Make a climin slicer to make drawing minibatches much quicker. Annoyingly, this doesn;t pickle.
|
||||
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
|
||||
X_batch, Y_batch = self.new_batch()
|
||||
self.X_all, self.Y_all = X, Y
|
||||
if batchsize is None:
|
||||
X_batch, Y_batch = X, Y
|
||||
else:
|
||||
import climin.util
|
||||
#Make a climin slicer to make drawing minibatches much quicker
|
||||
self.slicer = climin.util.draw_mini_slices(self.X_all.shape[0], self.batchsize)
|
||||
X_batch, Y_batch = self.new_batch()
|
||||
|
||||
#create the SVI inference method
|
||||
inf_method = svgp_inf()
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ def exponents(fnow, current_grad):
|
|||
return np.sign(exps) * np.log10(exps).astype(int)
|
||||
|
||||
class VerboseOptimization(object):
|
||||
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True):
|
||||
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False):
|
||||
self.verbose = verbose
|
||||
if self.verbose:
|
||||
self.model = model
|
||||
|
|
@ -22,6 +22,7 @@ class VerboseOptimization(object):
|
|||
self.opt_name = opt.opt_name
|
||||
self.model.add_observer(self, self.print_status)
|
||||
self.status = 'running'
|
||||
self.clear = clear_after_finish
|
||||
|
||||
self.update()
|
||||
|
||||
|
|
@ -37,30 +38,31 @@ class VerboseOptimization(object):
|
|||
self.ipython_notebook = False
|
||||
|
||||
if self.ipython_notebook:
|
||||
self.text.set_css('width', '100%')
|
||||
#self.progress.set_css('width', '100%')
|
||||
|
||||
left_col = ContainerWidget(children = [self.progress, self.text])
|
||||
right_col = ContainerWidget(children = [self.model_show])
|
||||
hor_align = ContainerWidget(children = [left_col, right_col])
|
||||
self.hor_align = ContainerWidget(children = [left_col, right_col])
|
||||
|
||||
display(hor_align)
|
||||
display(self.hor_align)
|
||||
|
||||
try:
|
||||
self.text.set_css('width', '100%')
|
||||
left_col.set_css({
|
||||
'padding': '2px',
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
right_col.set_css({
|
||||
'padding': '2px',
|
||||
})
|
||||
|
||||
self.hor_align.set_css({
|
||||
'width': "100%",
|
||||
})
|
||||
except:
|
||||
pass
|
||||
|
||||
left_col.set_css({
|
||||
'padding': '2px',
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
right_col.set_css({
|
||||
'padding': '2px',
|
||||
})
|
||||
|
||||
hor_align.set_css({
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
hor_align.remove_class('vbox')
|
||||
hor_align.add_class('hbox')
|
||||
self.hor_align.remove_class('vbox')
|
||||
self.hor_align.add_class('hbox')
|
||||
|
||||
left_col.add_class("box-flex1")
|
||||
right_col.add_class('box-flex0')
|
||||
|
|
@ -148,3 +150,5 @@ class VerboseOptimization(object):
|
|||
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
|
||||
print('Optimization status: {0:.5g}'.format(self.status))
|
||||
print()
|
||||
elif self.clear:
|
||||
self.hor_align.close()
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class SVGP(LatentFunctionInference):
|
|||
|
||||
|
||||
#quadrature for the likelihood
|
||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v)
|
||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
|
||||
|
||||
#rescale the F term if working on a batch
|
||||
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
|
||||
|
|
|
|||
|
|
@ -181,9 +181,12 @@ class Add(CombinationKernel):
|
|||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
if summarize:
|
||||
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
|
||||
i_s = np.zeros((self.input_dim))
|
||||
for k in self.parts:
|
||||
i_s[k.active_dims] += k.input_sensitivity(summarize)
|
||||
return i_s
|
||||
else:
|
||||
i_s = np.zeros((len(self.parts), self.input_dim))
|
||||
from operator import setitem
|
||||
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||
[setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||
return i_s
|
||||
|
|
|
|||
|
|
@ -6,3 +6,5 @@ from .poisson import Poisson
|
|||
from .student_t import StudentT
|
||||
from .likelihood import Likelihood
|
||||
from .mixed_noise import MixedNoise
|
||||
from .binomial import Binomial
|
||||
|
||||
|
|
|
|||
125
GPy/likelihoods/binomial.py
Normal file
125
GPy/likelihoods/binomial.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
from scipy import special
|
||||
|
||||
class Binomial(Likelihood):
|
||||
"""
|
||||
Binomial likelihood
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
.. Note::
|
||||
Y takes values in either {-1, 1} or {0, 1}.
|
||||
link function should have the domain [0, 1], e.g. probit (default) or Heaviside
|
||||
|
||||
.. See also::
|
||||
likelihood.py, for the parent class
|
||||
"""
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Probit()
|
||||
|
||||
super(Binomial, self).__init__(gp_link, 'Binomial')
|
||||
|
||||
def conditional_mean(self, gp, Y_metadata):
|
||||
return self.gp_link(gp)*Y_metadata['trials']
|
||||
|
||||
def pdf_link(self, inv_link_f, y, Y_metadata):
|
||||
"""
|
||||
Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}}
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata must contain 'trials'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
.. Note:
|
||||
Each y_i must be in {0, 1}
|
||||
"""
|
||||
return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata))
|
||||
|
||||
def logpdf_link(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood function given inverse link of f.
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i})
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata must contain 'trials'
|
||||
:returns: log likelihood evaluated at points inverse link of f.
|
||||
:rtype: float
|
||||
"""
|
||||
N = Y_metadata['trials']
|
||||
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
|
||||
|
||||
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f.
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata must contain 'trials'
|
||||
:returns: gradient of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
N = Y_metadata['trials']
|
||||
return y/inv_link_f - (N-y)/(1-inv_link_f)
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j
|
||||
i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j.
|
||||
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}}
|
||||
|
||||
:param inv_link_f: latent variables inverse link of f.
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in binomial
|
||||
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f.
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i)
|
||||
"""
|
||||
N = Y_metadata['trials']
|
||||
return -y/np.square(inv_link_f) - (N-y)/np.square(1-inv_link_f)
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
N = Y_metadata['trials']
|
||||
Ysim = np.random.binomial(N, self.gp_link.transf(gp))
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
pass
|
||||
|
|
@ -57,9 +57,8 @@ class Exponential(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
log_objective = np.log(link_f) - y*link_f
|
||||
return np.sum(log_objective)
|
||||
return log_objective
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -77,7 +76,6 @@ class Exponential(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
grad = 1./link_f - y
|
||||
#grad = y/(link_f**2) - 1./link_f
|
||||
return grad
|
||||
|
|
@ -103,7 +101,6 @@ class Exponential(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
hess = -1./(link_f**2)
|
||||
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||
return hess
|
||||
|
|
@ -123,7 +120,6 @@ class Exponential(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = 2./(link_f**3)
|
||||
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||
return d3lik_dlink3
|
||||
|
|
|
|||
|
|
@ -66,12 +66,11 @@ class Gamma(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#alpha = self.gp_link.transf(gp)*self.beta
|
||||
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||
alpha = link_f*self.beta
|
||||
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||
return np.sum(log_objective)
|
||||
return log_objective
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -90,7 +89,6 @@ class Gamma(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
|
||||
#old
|
||||
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||
|
|
@ -118,7 +116,6 @@ class Gamma(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
|
||||
#old
|
||||
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||
|
|
@ -140,6 +137,5 @@ class Gamma(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||
return d3lik_dlink3
|
||||
|
|
|
|||
|
|
@ -130,11 +130,10 @@ class Gaussian(Likelihood):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
ln_det_cov = N*np.log(self.variance)
|
||||
ln_det_cov = np.log(self.variance)
|
||||
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
||||
return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -151,8 +150,7 @@ class Gaussian(Likelihood):
|
|||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s2_i = (1.0/self.variance)
|
||||
s2_i = 1.0/self.variance
|
||||
grad = s2_i*y - s2_i*link_f
|
||||
return grad
|
||||
|
||||
|
|
@ -178,9 +176,9 @@ class Gaussian(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
hess = -(1.0/self.variance)*np.ones((N, 1))
|
||||
D = link_f.shape[1]
|
||||
hess = -(1.0/self.variance)*np.ones((N, D))
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
|
|
@ -198,9 +196,9 @@ class Gaussian(Likelihood):
|
|||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
d3logpdf_dlink3 = np.zeros((N,1))
|
||||
D = link_f.shape[1]
|
||||
d3logpdf_dlink3 = np.zeros((N,D))
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
|
|
@ -218,12 +216,11 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||
dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
|
||||
return dlik_dsigma
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -240,7 +237,6 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||
return dlik_grad_dsigma
|
||||
|
|
@ -260,15 +256,15 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
||||
D = link_f.shape[1]
|
||||
d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return np.asarray([[dlogpdf_dvar]])
|
||||
return dlogpdf_dvar
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
return z, mean, variance
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
"""
|
||||
Use Gauss-Hermite Quadrature to compute
|
||||
|
||||
|
|
@ -158,9 +158,9 @@ class Likelihood(Parameterized):
|
|||
|
||||
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
||||
# broadcast needs to be handled carefully.
|
||||
logp = self.logpdf(X,Y[:,None])
|
||||
dlogp_dx = self.dlogpdf_df(X, Y[:,None])
|
||||
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None])
|
||||
logp = self.logpdf(X,Y[:,None], Y_metadata=Y_metadata)
|
||||
dlogp_dx = self.dlogpdf_df(X, Y[:,None], Y_metadata=Y_metadata)
|
||||
d2logp_dx2 = self.d2logpdf_df2(X, Y[:,None], Y_metadata=Y_metadata)
|
||||
|
||||
#clipping for numerical stability
|
||||
#logp = np.clip(logp,-1e9,1e9)
|
||||
|
|
@ -425,7 +425,7 @@ class Likelihood(Parameterized):
|
|||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata).sum(axis=0)
|
||||
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
|
||||
|
|
|
|||
|
|
@ -64,8 +64,7 @@ class Poisson(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
|
||||
return -link_f + y*np.log(link_f) - special.gammaln(y+1)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -83,7 +82,6 @@ class Poisson(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
return y/link_f - 1
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
|
|
@ -107,12 +105,7 @@ class Poisson(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
hess = -y/(link_f**2)
|
||||
return hess
|
||||
#d2_df = self.gp_link.d2transf_df2(gp)
|
||||
#transf = self.gp_link.transf(gp)
|
||||
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||
return -y/(link_f**2)
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -129,7 +122,6 @@ class Poisson(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = 2*y/(link_f)**3
|
||||
return d3lik_dlink3
|
||||
|
||||
|
|
|
|||
|
|
@ -86,7 +86,6 @@ class StudentT(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
|
|
@ -97,7 +96,7 @@ class StudentT(Likelihood):
|
|||
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
)
|
||||
return np.sum(objective)
|
||||
return objective
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -115,7 +114,6 @@ class StudentT(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||
return grad
|
||||
|
|
@ -141,7 +139,6 @@ class StudentT(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||
return hess
|
||||
|
|
@ -161,7 +158,6 @@ class StudentT(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||
((e**2 + self.sigma2*self.v)**3)
|
||||
|
|
@ -183,10 +179,9 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
return np.sum(dlogpdf_dvar)
|
||||
return dlogpdf_dvar
|
||||
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -203,7 +198,6 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
|
@ -223,7 +217,6 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||
/ ((self.sigma2*self.v + (e**2))**3)
|
||||
|
|
@ -246,7 +239,7 @@ class StudentT(Likelihood):
|
|||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
# The comment here confuses mean and median.
|
||||
# The comment here confuses mean and median.
|
||||
return self.gp_link.transf(mu) # only true if link is monotonic, which it is.
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
|
|
|
|||
|
|
@ -362,7 +362,7 @@ class TestNoiseModels(object):
|
|||
def t_dlogpdf_df(self, model, Y, f):
|
||||
print("\n{}".format(inspect.stack()[0][3]))
|
||||
self.description = "\n{}".format(inspect.stack()[0][3])
|
||||
logpdf = functools.partial(model.logpdf, y=Y)
|
||||
logpdf = functools.partial(np.sum(model.logpdf), y=Y)
|
||||
dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y)
|
||||
grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g')
|
||||
grad.randomize()
|
||||
|
|
@ -652,9 +652,9 @@ class LaplaceTests(unittest.TestCase):
|
|||
print(m2)
|
||||
optimizer = 'scg'
|
||||
print("Gaussian")
|
||||
m1.optimize(optimizer, messages=debug)
|
||||
print("Laplace Gaussian")
|
||||
m2.optimize(optimizer, messages=debug)
|
||||
m1.optimize(optimizer, messages=debug, ipython_notebook=False)
|
||||
print ("Laplace Gaussian")
|
||||
m2.optimize(optimizer, messages=debug, ipython_notebook=False)
|
||||
if debug:
|
||||
print(m1)
|
||||
print(m2)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue