mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-21 14:05:14 +02:00
Dragged likelihood_function changes in
This commit is contained in:
parent
f641ab54a8
commit
b9a7a40795
1 changed files with 383 additions and 1 deletions
|
|
@ -3,12 +3,13 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import stats
|
from scipy import stats, integrate
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
import link_functions
|
import link_functions
|
||||||
|
from scipy.special import gammaln, gamma
|
||||||
|
|
||||||
class LikelihoodFunction(object):
|
class LikelihoodFunction(object):
|
||||||
"""
|
"""
|
||||||
|
|
@ -24,6 +25,7 @@ class LikelihoodFunction(object):
|
||||||
assert isinstance(link,link_functions.LinkFunction)
|
assert isinstance(link,link_functions.LinkFunction)
|
||||||
self.link = link
|
self.link = link
|
||||||
self.moments_match = self._moments_match_numerical
|
self.moments_match = self._moments_match_numerical
|
||||||
|
self.log_concave = True
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
def _preprocess_values(self,Y):
|
||||||
return Y
|
return Y
|
||||||
|
|
@ -164,3 +166,383 @@ class Poisson(LikelihoodFunction):
|
||||||
p_025 = tmp[:,0]
|
p_025 = tmp[:,0]
|
||||||
p_975 = tmp[:,1]
|
p_975 = tmp[:,1]
|
||||||
return mean,np.nan*mean,p_025,p_975 # better variance here TODO
|
return mean,np.nan*mean,p_025,p_975 # better variance here TODO
|
||||||
|
|
||||||
|
class Student_t(LikelihoodFunction):
|
||||||
|
"""Student t likelihood distribution
|
||||||
|
For nomanclature see Bayesian Data Analysis 2003 p576
|
||||||
|
|
||||||
|
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2)$$
|
||||||
|
|
||||||
|
Laplace:
|
||||||
|
Needs functions to calculate
|
||||||
|
ln p(yi|fi)
|
||||||
|
dln p(yi|fi)_dfi
|
||||||
|
d2ln p(yi|fi)_d2fifj
|
||||||
|
"""
|
||||||
|
def __init__(self, deg_free=5, sigma2=2, link=None):
|
||||||
|
super(Student_t, self).__init__(link)
|
||||||
|
self.v = deg_free
|
||||||
|
self.sigma2 = sigma2
|
||||||
|
|
||||||
|
self._set_params(np.asarray(sigma2))
|
||||||
|
self.log_concave = False
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self.sigma2)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["t_noise_std2"]
|
||||||
|
|
||||||
|
def _set_params(self, x):
|
||||||
|
self.sigma2 = float(x)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variance(self, extra_data=None):
|
||||||
|
return (self.v / float(self.v - 2)) * self.sigma2
|
||||||
|
|
||||||
|
def link_function(self, y, f, extra_data=None):
|
||||||
|
"""link_function $\ln p(y|f)$
|
||||||
|
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
|
||||||
|
|
||||||
|
For wolfram alpha import parts for derivative of sigma are -log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: float(likelihood evaluated for this point)
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
#A = gammaln((self.v + 1) * 0.5)
|
||||||
|
#B = - gammaln(self.v * 0.5)
|
||||||
|
#C = - 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
|
#D = + (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
|
||||||
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
|
- gammaln(self.v * 0.5)
|
||||||
|
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
|
+ (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
|
||||||
|
)
|
||||||
|
#print "C: {} D: {} obj: {}".format(C, np.sum(D), objective.sum())
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlik_df(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the link function at y, given f w.r.t f
|
||||||
|
|
||||||
|
$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2lik_d2f(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
|
||||||
|
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
|
||||||
|
|
||||||
|
$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
|
)
|
||||||
|
return d3lik_d3f
|
||||||
|
|
||||||
|
def lik_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
Terms relavent to derivatives wrt sigma are:
|
||||||
|
-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
|
||||||
|
|
||||||
|
$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
|
return dlik_dsigma
|
||||||
|
|
||||||
|
def dlik_df_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
dlik_grad_dsigma = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||||
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
|
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
|
/ ((self.sigma2*self.v + (e**2))**3)
|
||||||
|
)
|
||||||
|
return dlik_hess_dsigma
|
||||||
|
|
||||||
|
def _gradients(self, y, f, extra_data=None):
|
||||||
|
#must be listed in same order as 'get_param_names'
|
||||||
|
derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
|
||||||
|
[self.dlik_df_dstd(y, f, extra_data=extra_data)],
|
||||||
|
[self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
|
||||||
|
) # lists as we might learn many parameters
|
||||||
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
|
assert len(derivs[0]) == len(self._get_param_names())
|
||||||
|
assert len(derivs[1]) == len(self._get_param_names())
|
||||||
|
assert len(derivs[2]) == len(self._get_param_names())
|
||||||
|
return derivs
|
||||||
|
|
||||||
|
def predictive_values(self, mu, var):
|
||||||
|
"""
|
||||||
|
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
|
|
||||||
|
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
|
||||||
|
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
|
||||||
|
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
|
||||||
|
"""
|
||||||
|
|
||||||
|
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
|
||||||
|
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
|
||||||
|
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
|
||||||
|
#Which was also given to us as (var)
|
||||||
|
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
|
||||||
|
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
|
||||||
|
true_var = var + self.variance
|
||||||
|
|
||||||
|
#Now we have an analytical solution for the variances of the distribution p(y*|f*)p(f*) around our test points but we now
|
||||||
|
#need the 95 and 5 percentiles.
|
||||||
|
#FIXME: Hack, just pretend p(y*|f*)p(f*) is a gaussian and use the gaussian's percentiles
|
||||||
|
p_025 = mu - 2.*np.sqrt(true_var)
|
||||||
|
p_975 = mu + 2.*np.sqrt(true_var)
|
||||||
|
|
||||||
|
return mu, np.nan*mu, p_025, p_975
|
||||||
|
|
||||||
|
def sample_predicted_values(self, mu, var):
|
||||||
|
""" Experimental sample approches and numerical integration """
|
||||||
|
#p_025 = stats.t.ppf(.025, mu)
|
||||||
|
#p_975 = stats.t.ppf(.975, mu)
|
||||||
|
|
||||||
|
num_test_points = mu.shape[0]
|
||||||
|
#Each mu is the latent point f* at the test point x*,
|
||||||
|
#and the var is the gaussian variance at this point
|
||||||
|
#Take lots of samples from this, so we have lots of possible values
|
||||||
|
#for latent point f* for each test point x* weighted by how likely we were to pick it
|
||||||
|
print "Taking %d samples of f*".format(num_test_points)
|
||||||
|
num_f_samples = 10
|
||||||
|
num_y_samples = 10
|
||||||
|
student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples))
|
||||||
|
print "Student t means shape: ", student_t_means.shape
|
||||||
|
|
||||||
|
#Now we have lots of f*, lets work out the likelihood of getting this by sampling
|
||||||
|
#from a student t centred on this point, sample many points from this distribution
|
||||||
|
#centred on f*
|
||||||
|
#for test_point, f in enumerate(student_t_means):
|
||||||
|
#print test_point
|
||||||
|
#print f.shape
|
||||||
|
#student_t_samples = stats.t.rvs(self.v, loc=f[:,None],
|
||||||
|
#scale=self.sigma,
|
||||||
|
#size=(num_f_samples, num_y_samples))
|
||||||
|
#print student_t_samples.shape
|
||||||
|
|
||||||
|
student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None],
|
||||||
|
scale=self.sigma,
|
||||||
|
size=(num_test_points, num_y_samples, num_f_samples))
|
||||||
|
student_t_samples = np.reshape(student_t_samples,
|
||||||
|
(num_test_points, num_y_samples*num_f_samples))
|
||||||
|
|
||||||
|
#Now take the 97.5 and 0.25 percentile of these points
|
||||||
|
p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None]
|
||||||
|
p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None]
|
||||||
|
|
||||||
|
##Alernenately we could sample from int p(y|f*)p(f*|x*) df*
|
||||||
|
def t_gaussian(f, mu, var):
|
||||||
|
return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5))
|
||||||
|
* ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
|
||||||
|
)
|
||||||
|
|
||||||
|
def t_gauss_int(mu, var):
|
||||||
|
print "Mu: ", mu
|
||||||
|
print "var: ", var
|
||||||
|
result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))
|
||||||
|
print "Result: ", result
|
||||||
|
return result[0]
|
||||||
|
|
||||||
|
vec_t_gauss_int = np.vectorize(t_gauss_int)
|
||||||
|
|
||||||
|
p = vec_t_gauss_int(mu, var)
|
||||||
|
p_025 = mu - p
|
||||||
|
p_975 = mu + p
|
||||||
|
return mu, np.nan*mu, p_025, p_975
|
||||||
|
|
||||||
|
class Gaussian(LikelihoodFunction):
|
||||||
|
"""
|
||||||
|
Gaussian likelihood - this is a test class for approximation schemes
|
||||||
|
"""
|
||||||
|
def __init__(self, variance, D, N, link=None):
|
||||||
|
super(Gaussian, self).__init__(link)
|
||||||
|
self.D = D
|
||||||
|
self.N = N
|
||||||
|
self._variance = float(variance)
|
||||||
|
self._set_params(np.asarray(variance))
|
||||||
|
|
||||||
|
#Don't support normalizing yet
|
||||||
|
self._bias = np.zeros((1, self.D))
|
||||||
|
self._scale = np.ones((1, self.D))
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.asarray(self._variance)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ["noise_variance"]
|
||||||
|
|
||||||
|
def _set_params(self, x):
|
||||||
|
self._variance = float(x)
|
||||||
|
self.I = np.eye(self.N)
|
||||||
|
self.covariance_matrix = self.I * self._variance
|
||||||
|
self.Ki = self.I*(1.0 / self._variance)
|
||||||
|
self.ln_K = np.trace(self.covariance_matrix)
|
||||||
|
|
||||||
|
def link_function(self, y, f, extra_data=None):
|
||||||
|
"""link_function $\ln p(y|f)$
|
||||||
|
$$\ln p(y_{i}|f_{i}) = \ln $$
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: float(likelihood evaluated for this point)
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
eeT = np.dot(e, e.T)
|
||||||
|
objective = (- 0.5*self.D*np.log(2*np.pi)
|
||||||
|
- 0.5*self.ln_K
|
||||||
|
#- 0.5*np.sum(np.multiply(self.Ki, eeT))
|
||||||
|
- 0.5*np.dot(np.dot(e.T, self.Ki), e)
|
||||||
|
)
|
||||||
|
return np.sum(objective)
|
||||||
|
|
||||||
|
def dlik_df(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the link function at y, given f w.r.t f
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: gradient of likelihood evaluated at points
|
||||||
|
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
s2_i = (1.0/self._variance)*self.I
|
||||||
|
grad = np.dot(s2_i, y) - 0.5*np.dot(s2_i, f)
|
||||||
|
return grad
|
||||||
|
|
||||||
|
def d2lik_d2f(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
|
||||||
|
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
|
||||||
|
|
||||||
|
:y: data
|
||||||
|
:f: latent variables f
|
||||||
|
:extra_data: extra_data which is not used in student t distribution
|
||||||
|
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
s2_i = (1.0/self._variance)*self.I
|
||||||
|
hess = 0.5*np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
|
||||||
|
return hess
|
||||||
|
|
||||||
|
def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
|
||||||
|
return d3lik_d3f
|
||||||
|
|
||||||
|
def lik_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
e = y - f
|
||||||
|
s_4 = 1.0/(self._variance**2)
|
||||||
|
dlik_dsigma = -0.5*self.N*1/self._variance + 0.5*s_4*np.trace(np.dot(e.T, np.dot(self.I, e)))
|
||||||
|
return dlik_dsigma
|
||||||
|
|
||||||
|
def dlik_df_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
s_4 = 1.0/(self._variance**2)
|
||||||
|
dlik_grad_dsigma = -np.dot(s_4, np.dot(self.I, y)) + 0.5*np.dot(s_4, np.dot(self.I, f))
|
||||||
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
|
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
"""
|
||||||
|
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
|
||||||
|
"""
|
||||||
|
assert y.shape == f.shape
|
||||||
|
dlik_hess_dsigma = 0.5*np.diag((1.0/(self._variance**2))*self.I)[:, None]
|
||||||
|
return dlik_hess_dsigma
|
||||||
|
|
||||||
|
def _gradients(self, y, f, extra_data=None):
|
||||||
|
#must be listed in same order as 'get_param_names'
|
||||||
|
derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
|
||||||
|
[self.dlik_df_dstd(y, f, extra_data=extra_data)],
|
||||||
|
[self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
|
||||||
|
) # lists as we might learn many parameters
|
||||||
|
# ensure we have gradients for every parameter we want to optimize
|
||||||
|
assert len(derivs[0]) == len(self._get_param_names())
|
||||||
|
assert len(derivs[1]) == len(self._get_param_names())
|
||||||
|
assert len(derivs[2]) == len(self._get_param_names())
|
||||||
|
return derivs
|
||||||
|
|
||||||
|
def predictive_values(self, mu, var):
|
||||||
|
mean = mu * self._scale + self._bias
|
||||||
|
true_var = (var + self._variance) * self._scale ** 2
|
||||||
|
_5pc = mean - 2.*np.sqrt(true_var)
|
||||||
|
_95pc = mean + 2.*np.sqrt(true_var)
|
||||||
|
return mean, true_var, _5pc, _95pc
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue