mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-18 13:55:14 +02:00
Gradients almost there for dytil_dfhat, diagonal terms are right
This commit is contained in:
parent
cad2c271eb
commit
569311b510
3 changed files with 140 additions and 8 deletions
|
|
@ -96,7 +96,10 @@ class Laplace(likelihood):
|
||||||
#dytil_dfhat1 = np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) # or self.Wi__Ki_W? Theyre the same basically
|
#dytil_dfhat1 = np.dot(self.Sigma_tilde, Ki) + np.eye(self.N) # or self.Wi__Ki_W? Theyre the same basically
|
||||||
|
|
||||||
a = mdot(dWi_dfhat, Ki, self.f_hat)
|
a = mdot(dWi_dfhat, Ki, self.f_hat)
|
||||||
dytil_dfhat = mdot(dWi_dfhat, Ki, self.f_hat) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N)
|
b = np.dot(self.Sigma_tilde, Ki)
|
||||||
|
dytil_dfhat = - np.dot(dWi_dfhat, np.dot(Ki, self.f_hat)) + np.dot(self.Sigma_tilde, Ki) + np.eye(self.N)
|
||||||
|
#dytil_dfhat = - (np.dot(dWi_dfhat, Ki)*self.f_hat[:, None] + np.dot(self.Sigma_tilde, Ki)).sum(-1) + np.eye(self.N)
|
||||||
|
self.dytil_dfhat = dytil_dfhat
|
||||||
return dL_dytil, dytil_dfhat
|
return dL_dytil, dytil_dfhat
|
||||||
|
|
||||||
def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK):
|
def _Kgradients(self, dL_d_K_Sigma, dK_dthetaK):
|
||||||
|
|
@ -330,19 +333,25 @@ class Laplace(likelihood):
|
||||||
|
|
||||||
def fit_full(self, K):
|
def fit_full(self, K):
|
||||||
"""
|
"""
|
||||||
The laplace approximation algorithm
|
The laplace approximation algorithm, find K and expand hessian
|
||||||
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
|
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
|
||||||
:K: Covariance matrix
|
:K: Covariance matrix
|
||||||
"""
|
"""
|
||||||
self.K = K.copy()
|
self.K = K.copy()
|
||||||
#assert np.all(self.K.T == self.K)
|
|
||||||
#self.K_safe = K.copy()
|
#Find mode
|
||||||
if self.rasm:
|
if self.rasm:
|
||||||
self.f_hat = self.rasm_mode(K)
|
self.f_hat = self.rasm_mode(K)
|
||||||
else:
|
else:
|
||||||
self.f_hat = self.ncg_mode(K)
|
self.f_hat = self.ncg_mode(K)
|
||||||
|
|
||||||
|
#Compute hessian and other variables at mode
|
||||||
|
self._compute_likelihood_variables()
|
||||||
|
|
||||||
|
def _compute_likelihood_variables(self):
|
||||||
#At this point get the hessian matrix
|
#At this point get the hessian matrix
|
||||||
|
#print "Data: ", self.data
|
||||||
|
#print "fhat: ", self.f_hat
|
||||||
self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat, extra_data=self.extra_data))
|
self.W = -np.diag(self.likelihood_function.link_hess(self.data, self.f_hat, extra_data=self.extra_data))
|
||||||
|
|
||||||
if not self.likelihood_function.log_concave:
|
if not self.likelihood_function.log_concave:
|
||||||
|
|
@ -352,14 +361,14 @@ class Laplace(likelihood):
|
||||||
#This is a property only held by non-log-concave likelihoods
|
#This is a property only held by non-log-concave likelihoods
|
||||||
|
|
||||||
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
|
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
|
||||||
self.B, self.B_chol, self.W_12 = self._compute_B_statistics(K, self.W)
|
self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W)
|
||||||
self.Bi, _, _, B_det = pdinv(self.B)
|
self.Bi, _, _, B_det = pdinv(self.B)
|
||||||
|
|
||||||
Ki_W_i = self.K - mdot(self.K, self.W_12, self.Bi, self.W_12, self.K)
|
Ki_W_i = self.K - mdot(self.K, self.W_12, self.Bi, self.W_12, self.K)
|
||||||
self.ln_Ki_W_i_det = np.linalg.det(Ki_W_i)
|
self.ln_Ki_W_i_det = np.linalg.det(Ki_W_i)
|
||||||
|
|
||||||
b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat, extra_data=self.extra_data)[:, None]
|
b = np.dot(self.W, self.f_hat) + self.likelihood_function.link_grad(self.data, self.f_hat, extra_data=self.extra_data)[:, None]
|
||||||
solve_chol = cho_solve((self.B_chol, True), mdot(self.W_12, (K, b)))
|
solve_chol = cho_solve((self.B_chol, True), mdot(self.W_12, (self.K, b)))
|
||||||
a = b - mdot(self.W_12, solve_chol)
|
a = b - mdot(self.W_12, solve_chol)
|
||||||
self.f_Ki_f = np.dot(self.f_hat.T, a)
|
self.f_Ki_f = np.dot(self.f_hat.T, a)
|
||||||
self.ln_K_det = pddet(self.K)
|
self.ln_K_det = pddet(self.K)
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,7 @@ from scipy.special import gammaln, gamma
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
|
||||||
class likelihood_function:
|
class likelihood_function:
|
||||||
"""
|
""" Likelihood class for doing Expectation propagation
|
||||||
Likelihood class for doing Expectation propagation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
..Note:: Y values allowed depend on the likelihood_function used
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
|
|
@ -241,6 +240,7 @@ class student_t(likelihood_function):
|
||||||
y = np.squeeze(y)
|
y = np.squeeze(y)
|
||||||
f = np.squeeze(f)
|
f = np.squeeze(f)
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
|
|
||||||
e = y - f
|
e = y - f
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2)
|
||||||
return np.squeeze(hess)
|
return np.squeeze(hess)
|
||||||
|
|
|
||||||
123
GPy/testing/laplace_approx.tests.py
Normal file
123
GPy/testing/laplace_approx.tests.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import GPy
|
||||||
|
from GPy.models import GP
|
||||||
|
from GPy.util.linalg import pdinv, tdot
|
||||||
|
from scipy import linalg
|
||||||
|
|
||||||
|
class LikelihoodGradParam(GP):
|
||||||
|
def __init__(self, X, likelihood_function, kernel, param_name=None, function=None, **kwargs):
|
||||||
|
super(LikelihoodGradParam, self).__init__(X, likelihood_function, kernel)
|
||||||
|
self.param_name = param_name
|
||||||
|
self.func = function
|
||||||
|
#self.func_params = kwargs
|
||||||
|
#self.parameter = self.likelihood.__getattribute__(self.param_name)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
f_hats = ["f_{}".format(i) for i in range(len(self.likelihood.f_hat))]
|
||||||
|
return f_hats
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack([np.squeeze(self.likelihood.f_hat)])
|
||||||
|
#return np.hstack([self.likelihood.__getattribute__(self.param_name)])
|
||||||
|
|
||||||
|
def hack_dL_dK(self):
|
||||||
|
self.K = self.kern.K(self.X)
|
||||||
|
self.K += self.likelihood.covariance_matrix
|
||||||
|
|
||||||
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
||||||
|
# the gradient of the likelihood wrt the covariance matrix
|
||||||
|
if self.likelihood.YYT is None:
|
||||||
|
alpha, _ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y, lower=1)
|
||||||
|
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
|
||||||
|
else:
|
||||||
|
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
|
||||||
|
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
||||||
|
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
||||||
|
|
||||||
|
def _set_params(self, x):
|
||||||
|
self.likelihood.f_hat = x.reshape(self.N, 1)
|
||||||
|
self.likelihood._compute_likelihood_variables()
|
||||||
|
self.hack_dL_dK()
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
return self.func(self.likelihood)[0, 0]
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
#gradient = self.likelihood.__getattribute__(self.param_name)
|
||||||
|
self.likelihood._compute_likelihood_variables()
|
||||||
|
self.likelihood._gradients(partial=np.diag(self.dL_dK))
|
||||||
|
gradient = getattr(self.likelihood, self.param_name)
|
||||||
|
#Need to sum over fhats? For dytil_dfhat...
|
||||||
|
#gradient = np.flatten(gradient, axis=0)
|
||||||
|
#return gradient[:, 0]
|
||||||
|
return gradient[0, :]
|
||||||
|
|
||||||
|
|
||||||
|
class LaplaceTests(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
real_var = 0.1
|
||||||
|
#Start a function, any function
|
||||||
|
#self.X = np.linspace(0.0, 10.0, 30)[:, None]
|
||||||
|
self.X = np.random.randn(2,1)
|
||||||
|
#self.X = np.ones((10,1))
|
||||||
|
Y = np.sin(self.X) + np.random.randn(*self.X.shape)*real_var
|
||||||
|
self.Y = Y/Y.max()
|
||||||
|
self.kernel = GPy.kern.rbf(self.X.shape[1])
|
||||||
|
|
||||||
|
deg_free = 10000
|
||||||
|
real_sd = np.sqrt(real_var)
|
||||||
|
initial_sd_guess = 1
|
||||||
|
|
||||||
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=initial_sd_guess)
|
||||||
|
self.stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, rasm=True)
|
||||||
|
self.stu_t_likelihood.fit_full(self.kernel.K(self.X))
|
||||||
|
self.m = LikelihoodGradParam(self.X, self.stu_t_likelihood, self.kernel, None, None)
|
||||||
|
self.m.constrain_fixed('rbf_v', 1.0898)
|
||||||
|
self.m.constrain_fixed('rbf_l', 1.8651)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.m = None
|
||||||
|
|
||||||
|
def test_dy_dfhat(self):
|
||||||
|
def ytil(likelihood):
|
||||||
|
Sigma_tilde = likelihood.Sigma_tilde
|
||||||
|
K = likelihood.K
|
||||||
|
Ki, _, _, _ = pdinv(K)
|
||||||
|
f_hat = likelihood.f_hat
|
||||||
|
Sigma, _, _, _ = pdinv(Sigma_tilde)
|
||||||
|
return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat)
|
||||||
|
|
||||||
|
self.m.func = ytil
|
||||||
|
self.m.param_name = 'dytil_dfhat'
|
||||||
|
self.m.randomize()
|
||||||
|
#try:
|
||||||
|
self.m.checkgrad(verbose=1)
|
||||||
|
assert self.m.checkgrad()
|
||||||
|
#except:
|
||||||
|
#import ipdb;ipdb.set_trace()
|
||||||
|
|
||||||
|
|
||||||
|
#def test_dL_dytil(self):
|
||||||
|
#def L(likelihood):
|
||||||
|
##-0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||||
|
#Sigma_tilde = likelihood.Sigma_tilde
|
||||||
|
#Ki = likelihood.K
|
||||||
|
#f_hat = likelihood.f_hat
|
||||||
|
#Sigma, _, _, _ = pdinv(Sigma_tilde)
|
||||||
|
#return np.dot(np.dot(Sigma_tilde, (Ki + Sigma)), f_hat)
|
||||||
|
|
||||||
|
#self.m.func = L
|
||||||
|
#self.m.param_name = 'dL_dytil'
|
||||||
|
#m.randomize()
|
||||||
|
##try:
|
||||||
|
#m.checkgrad(verbose=1)
|
||||||
|
#assert m.checkgrad()
|
||||||
|
#except:
|
||||||
|
#import ipdb;ipdb.set_trace()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue