mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-27 13:56:23 +02:00
Reparameratised in terms of sigma2
This commit is contained in:
parent
ab6a3a571e
commit
4e5cefb4b5
4 changed files with 207 additions and 49 deletions
|
|
@ -244,9 +244,6 @@ class model(parameterised):
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||||
obj_grads = -LL_gradients - prior_gradients
|
obj_grads = -LL_gradients - prior_gradients
|
||||||
print self
|
|
||||||
#self.checkgrad(verbose=1)
|
|
||||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
|
||||||
return obj_f, obj_grads
|
return obj_f, obj_grads
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ def timing():
|
||||||
edited_real_sd = real_sd
|
edited_real_sd = real_sd
|
||||||
kernel1 = GPy.kern.rbf(X.shape[1])
|
kernel1 = GPy.kern.rbf(X.shape[1])
|
||||||
|
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
|
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
|
||||||
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel1)
|
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel1)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
@ -53,7 +53,7 @@ def v_fail_test():
|
||||||
edited_real_sd = real_sd
|
edited_real_sd = real_sd
|
||||||
|
|
||||||
print "Clean student t, rasm"
|
print "Clean student t, rasm"
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
||||||
m = GPy.models.GP(X, stu_t_likelihood, kernel1)
|
m = GPy.models.GP(X, stu_t_likelihood, kernel1)
|
||||||
m.constrain_positive('')
|
m.constrain_positive('')
|
||||||
|
|
@ -92,18 +92,18 @@ def debug_student_t_noise_approx():
|
||||||
X = np.random.rand(100)[:, None]
|
X = np.random.rand(100)[:, None]
|
||||||
#X = np.random.rand(100)[:, None]
|
#X = np.random.rand(100)[:, None]
|
||||||
#X = np.array([0.5, 1])[:, None]
|
#X = np.array([0.5, 1])[:, None]
|
||||||
Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var
|
Y = np.sin(X*2*np.pi) + np.random.randn(*X.shape)*real_var + 1
|
||||||
#Y = X + np.random.randn(*X.shape)*real_var
|
#Y = X + np.random.randn(*X.shape)*real_var
|
||||||
#ty = np.array([1., 9.97733584, 4.17841363])[:, None]
|
#ty = np.array([1., 9.97733584, 4.17841363])[:, None]
|
||||||
#Y = ty
|
#Y = ty
|
||||||
|
|
||||||
X_full = X
|
X_full = X
|
||||||
Y_full = np.sin(X_full)
|
Y_full = np.sin(X_full) + 1
|
||||||
|
|
||||||
Y = Y/Y.max()
|
Y = Y/Y.max()
|
||||||
|
|
||||||
#Add student t random noise to datapoints
|
#Add student t random noise to datapoints
|
||||||
deg_free = 1000
|
deg_free = 100
|
||||||
|
|
||||||
real_sd = np.sqrt(real_var)
|
real_sd = np.sqrt(real_var)
|
||||||
print "Real noise std: ", real_sd
|
print "Real noise std: ", real_sd
|
||||||
|
|
@ -115,7 +115,7 @@ def debug_student_t_noise_approx():
|
||||||
|
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
kernel1 = GPy.kern.rbf(X.shape[1]) #+ GPy.kern.white(X.shape[1])
|
||||||
#kernel1 = GPy.kern.linear(X.shape[1]) + GPy.kern.white(X.shape[1])
|
#kernel1 = GPy.kern.linear(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||||
kernel2 = kernel1.copy()
|
kernel2 = kernel1.copy()
|
||||||
kernel3 = kernel1.copy()
|
kernel3 = kernel1.copy()
|
||||||
|
|
@ -140,24 +140,24 @@ def debug_student_t_noise_approx():
|
||||||
#print m
|
#print m
|
||||||
|
|
||||||
real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
|
real_stu_t_std = np.sqrt(real_var*((deg_free - 2)/float(deg_free)))
|
||||||
edited_real_sd = real_stu_t_std + 1#initial_var_guess #real_sd
|
edited_real_sd = real_stu_t_std**2 #initial_var_guess #real_sd
|
||||||
#edited_real_sd = real_sd
|
#edited_real_sd = real_sd
|
||||||
|
|
||||||
print "Clean student t, rasm"
|
print "Clean student t, rasm"
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
||||||
|
|
||||||
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
|
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
|
||||||
#m['rbf_len'] = 1.5
|
#m['rbf_len'] = 1.5
|
||||||
#m.constrain_fixed('rbf_v', 1.0898)
|
#m.constrain_fixed('rbf_v', 1.0898)
|
||||||
#m.constrain_fixed('rbf_l', 1.8651)
|
#m.constrain_fixed('rbf_l', 0.2651)
|
||||||
#m.constrain_fixed('t_noise_std', edited_real_sd)
|
#m.constrain_fixed('t_noise_std', edited_real_sd)
|
||||||
#m.constrain_positive('rbf')
|
#m.constrain_positive('rbf')
|
||||||
#m.constrain_positive('t_noise_std')
|
m.constrain_positive('t_noise_std')
|
||||||
#m.constrain_positive('')
|
#m.constrain_positive('')
|
||||||
#m.constrain_bounded('t_noi', 0.001, 10)
|
#m.constrain_bounded('t_noi', 0.001, 10)
|
||||||
#m.constrain_fixed('t_noi', real_stu_t_std)
|
#m.constrain_fixed('t_noi', real_stu_t_std)
|
||||||
m.constrain_fixed('white', 0.01)
|
#m.constrain_fixed('white', 0.01)
|
||||||
#m.constrain_fixed('t_no', 0.01)
|
#m.constrain_fixed('t_no', 0.01)
|
||||||
#m['rbf_var'] = 0.20446332
|
#m['rbf_var'] = 0.20446332
|
||||||
#m['rbf_leng'] = 0.85776241
|
#m['rbf_leng'] = 0.85776241
|
||||||
|
|
@ -179,7 +179,7 @@ def debug_student_t_noise_approx():
|
||||||
return m
|
return m
|
||||||
|
|
||||||
#print "Clean student t, ncg"
|
#print "Clean student t, ncg"
|
||||||
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
|
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
|
||||||
#m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
#m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
||||||
#m.ensure_default_constraints()
|
#m.ensure_default_constraints()
|
||||||
|
|
@ -276,7 +276,7 @@ def student_t_approx():
|
||||||
edited_real_sd = real_sd #initial_var_guess
|
edited_real_sd = real_sd #initial_var_guess
|
||||||
|
|
||||||
print "Clean student t, rasm"
|
print "Clean student t, rasm"
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
|
||||||
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
|
m = GPy.models.GP(X, stu_t_likelihood, kernel6)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
@ -291,7 +291,7 @@ def student_t_approx():
|
||||||
plt.title('Student-t rasm clean')
|
plt.title('Student-t rasm clean')
|
||||||
|
|
||||||
print "Corrupt student t, rasm"
|
print "Corrupt student t, rasm"
|
||||||
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
|
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
|
||||||
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
|
m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel4)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
@ -308,7 +308,7 @@ def student_t_approx():
|
||||||
return m
|
return m
|
||||||
|
|
||||||
#print "Clean student t, ncg"
|
#print "Clean student t, ncg"
|
||||||
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
|
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
|
||||||
#m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
#m = GPy.models.GP(X, stu_t_likelihood, kernel3)
|
||||||
#m.ensure_default_constraints()
|
#m.ensure_default_constraints()
|
||||||
|
|
@ -322,7 +322,7 @@ def student_t_approx():
|
||||||
#plt.title('Student-t ncg clean')
|
#plt.title('Student-t ncg clean')
|
||||||
|
|
||||||
#print "Corrupt student t, ncg"
|
#print "Corrupt student t, ncg"
|
||||||
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma=edited_real_sd)
|
#t_distribution = GPy.likelihoods.likelihood_functions.student_t(deg_free, sigma2=edited_real_sd)
|
||||||
#corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
|
#corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
|
||||||
#m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
|
#m = GPy.models.GP(X, corrupt_stu_t_likelihood, kernel5)
|
||||||
#m.ensure_default_constraints()
|
#m.ensure_default_constraints()
|
||||||
|
|
@ -337,7 +337,7 @@ def student_t_approx():
|
||||||
|
|
||||||
|
|
||||||
###with a student t distribution, since it has heavy tails it should work well
|
###with a student t distribution, since it has heavy tails it should work well
|
||||||
###likelihood_function = student_t(deg_free, sigma=real_var)
|
###likelihood_function = student_t(deg_free, sigma2=real_var)
|
||||||
###lap = Laplace(Y, likelihood_function)
|
###lap = Laplace(Y, likelihood_function)
|
||||||
###cov = kernel.K(X)
|
###cov = kernel.K(X)
|
||||||
###lap.fit_full(cov)
|
###lap.fit_full(cov)
|
||||||
|
|
|
||||||
|
|
@ -220,10 +220,10 @@ class Laplace(likelihood):
|
||||||
self.ln_I_KW_det = pddet(np.eye(self.N) + self.W_12*self.K*self.W_12.T)
|
self.ln_I_KW_det = pddet(np.eye(self.N) + self.W_12*self.K*self.W_12.T)
|
||||||
|
|
||||||
#self.ln_I_KW_det = pddet(np.eye(self.N) + np.dot(self.K, self.W))
|
#self.ln_I_KW_det = pddet(np.eye(self.N) + np.dot(self.K, self.W))
|
||||||
self.ln_z_hat = (- 0.5*self.f_Ki_f
|
#self.ln_z_hat = (- 0.5*self.f_Ki_f
|
||||||
- self.ln_I_KW_det
|
#- self.ln_I_KW_det
|
||||||
+ self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
|
#+ self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
|
||||||
)
|
#)
|
||||||
|
|
||||||
return self._compute_GP_variables()
|
return self._compute_GP_variables()
|
||||||
|
|
||||||
|
|
@ -308,6 +308,8 @@ class Laplace(likelihood):
|
||||||
step_size = 1
|
step_size = 1
|
||||||
rs = 0
|
rs = 0
|
||||||
i = 0
|
i = 0
|
||||||
|
#if self.likelihood_function.sigma < 0.001:
|
||||||
|
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
||||||
while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
|
while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
|
||||||
W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
|
W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
|
||||||
if not self.likelihood_function.log_concave:
|
if not self.likelihood_function.log_concave:
|
||||||
|
|
@ -316,8 +318,6 @@ class Laplace(likelihood):
|
||||||
# To cause the posterior to become less certain than the prior and likelihood,
|
# To cause the posterior to become less certain than the prior and likelihood,
|
||||||
# This is a property only held by non-log-concave likelihoods
|
# This is a property only held by non-log-concave likelihoods
|
||||||
B, L, W_12 = self._compute_B_statistics(K, W)
|
B, L, W_12 = self._compute_B_statistics(K, W)
|
||||||
#if i > 30:
|
|
||||||
#import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
|
|
||||||
|
|
||||||
W_f = W*f
|
W_f = W*f
|
||||||
grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data)
|
grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data)
|
||||||
|
|
|
||||||
|
|
@ -158,26 +158,26 @@ class student_t(likelihood_function):
|
||||||
dln p(yi|fi)_dfi
|
dln p(yi|fi)_dfi
|
||||||
d2ln p(yi|fi)_d2fifj
|
d2ln p(yi|fi)_d2fifj
|
||||||
"""
|
"""
|
||||||
def __init__(self, deg_free, sigma=2):
|
def __init__(self, deg_free, sigma2=2):
|
||||||
#super(student_t, self).__init__()
|
#super(student_t, self).__init__()
|
||||||
self.v = deg_free
|
self.v = deg_free
|
||||||
self.sigma = sigma
|
self.sigma2 = sigma2
|
||||||
self.log_concave = False
|
self.log_concave = False
|
||||||
|
|
||||||
self._set_params(np.asarray(sigma))
|
self._set_params(np.asarray(sigma2))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.asarray(self.sigma)
|
return np.asarray(self.sigma2)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return ["t_noise_std"]
|
return ["t_noise_std2"]
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
self.sigma = float(x)
|
self.sigma2 = float(x)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def variance(self, extra_data=None):
|
def variance(self, extra_data=None):
|
||||||
return (self.v / float(self.v - 2)) * (self.sigma**2)
|
return (self.v / float(self.v - 2)) * self.sigma2
|
||||||
|
|
||||||
def link_function(self, y, f, extra_data=None):
|
def link_function(self, y, f, extra_data=None):
|
||||||
"""link_function $\ln p(y|f)$
|
"""link_function $\ln p(y|f)$
|
||||||
|
|
@ -193,12 +193,16 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
|
A = gammaln((self.v + 1) * 0.5)
|
||||||
|
B = -gammaln(self.v * 0.5)
|
||||||
|
C = - 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
|
D = (-(self.v + 1)*0.5)*np.log(1 + (e**2)/(self.v*self.sigma2))
|
||||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||||
- gammaln(self.v * 0.5)
|
- gammaln(self.v * 0.5)
|
||||||
- 0.5*np.log((self.sigma**2) * self.v * np.pi)
|
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||||
#- (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
|
+ (-(self.v + 1)*0.5)*np.log(1 + ((e**2)/self.sigma2)/np.float(self.v))
|
||||||
- (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
|
|
||||||
)
|
)
|
||||||
|
#print "A: {} B: {} C: {} D: {} obj: {}".format(A,B,C,D.sum(),objective.sum())
|
||||||
return np.sum(objective)
|
return np.sum(objective)
|
||||||
|
|
||||||
def dlik_df(self, y, f, extra_data=None):
|
def dlik_df(self, y, f, extra_data=None):
|
||||||
|
|
@ -215,7 +219,7 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
grad = ((self.v + 1) * e) / (self.v * (self.sigma**2) + (e**2))
|
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def d2lik_d2f(self, y, f, extra_data=None):
|
def d2lik_d2f(self, y, f, extra_data=None):
|
||||||
|
|
@ -235,7 +239,7 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
hess = ((self.v + 1)*(e**2 - self.v*(self.sigma**2))) / ((((self.sigma**2)*self.v) + e**2)**2)
|
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
|
||||||
return hess
|
return hess
|
||||||
|
|
||||||
def d3lik_d3f(self, y, f, extra_data=None):
|
def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
|
@ -246,8 +250,8 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*(self.sigma**2))) /
|
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
((e**2 + (self.sigma**2)*self.v)**3)
|
((e**2 + self.sigma2*self.v)**3)
|
||||||
)
|
)
|
||||||
return d3lik_d3f
|
return d3lik_d3f
|
||||||
|
|
||||||
|
|
@ -262,10 +266,16 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
dlik_dsigma = ( - (1/self.sigma) +
|
#sigma = np.sqrt(self.sigma2)
|
||||||
((1+self.v)*(e**2))/((self.sigma**3)*self.v*(1 + ((e**2) / ((self.sigma**2)*self.v)) ) )
|
#dlik_dsigma = ( - (1/sigma) +
|
||||||
)
|
#((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
||||||
|
#)
|
||||||
|
#dlik_dsigma = ( - 1 +
|
||||||
|
#((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
||||||
|
#)
|
||||||
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
|
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
|
||||||
|
#dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
|
||||||
|
dlik_dsigma = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||||
return dlik_dsigma
|
return dlik_dsigma
|
||||||
|
|
||||||
def dlik_df_dstd(self, y, f, extra_data=None):
|
def dlik_df_dstd(self, y, f, extra_data=None):
|
||||||
|
|
@ -276,9 +286,11 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
dlik_grad_dsigma = ((-2*self.sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
|
#sigma = np.sqrt(self.sigma2)
|
||||||
/ ((self.v*(self.sigma**2) + e**2)**2)
|
#dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
|
||||||
)
|
#/ ((self.v*self.sigma2 + e**2)**2)
|
||||||
|
#)
|
||||||
|
dlik_grad_dsigma = (-self.v*(self.v+1)*e)/((self.sigma2*self.v + e**2)**2)
|
||||||
return dlik_grad_dsigma
|
return dlik_grad_dsigma
|
||||||
|
|
||||||
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
|
@ -289,11 +301,15 @@ class student_t(likelihood_function):
|
||||||
"""
|
"""
|
||||||
assert y.shape == f.shape
|
assert y.shape == f.shape
|
||||||
e = y - f
|
e = y - f
|
||||||
dlik_hess_dsigma = ( (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2))) /
|
#sigma = np.sqrt(self.sigma2)
|
||||||
((e**2 + (self.sigma**2)*self.v)**3)
|
#dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
|
||||||
)
|
#((e**2 + self.sigma2*self.v)**3)
|
||||||
|
#)
|
||||||
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
|
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
|
||||||
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
|
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
|
||||||
|
dlik_hess_dsigma = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||||
|
/ (self.sigma2*self.v + (e**2))**3
|
||||||
|
)
|
||||||
return dlik_hess_dsigma
|
return dlik_hess_dsigma
|
||||||
|
|
||||||
def _gradients(self, y, f, extra_data=None):
|
def _gradients(self, y, f, extra_data=None):
|
||||||
|
|
@ -466,3 +482,148 @@ class weibull_survival(likelihood_function):
|
||||||
|
|
||||||
hess = (y**self.shape)*np.exp(f)
|
hess = (y**self.shape)*np.exp(f)
|
||||||
return np.squeeze(hess)
|
return np.squeeze(hess)
|
||||||
|
|
||||||
|
#class gaussian(likelihood_function):
|
||||||
|
#"""
|
||||||
|
#Gaussian likelihood - this is a test class for approximation schemes
|
||||||
|
#"""
|
||||||
|
#def __init__(self, variance):
|
||||||
|
#self._set_params(np.asarray(variance))
|
||||||
|
|
||||||
|
#def _get_params(self):
|
||||||
|
#return np.asarray(self.sigma2)
|
||||||
|
|
||||||
|
#def _get_param_names(self):
|
||||||
|
#return ["noise_variance"]
|
||||||
|
|
||||||
|
#def _set_params(self, x):
|
||||||
|
#self.variance = float(x)
|
||||||
|
|
||||||
|
#def link_function(self, y, f, extra_data=None):
|
||||||
|
#"""link_function $\ln p(y|f)$
|
||||||
|
#$$\ln p(y_{i}|f_{i}) = \ln $$
|
||||||
|
|
||||||
|
#:y: data
|
||||||
|
#:f: latent variables f
|
||||||
|
#:extra_data: extra_data which is not used in student t distribution
|
||||||
|
#:returns: float(likelihood evaluated for this point)
|
||||||
|
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#objective = -0.5*self.D*
|
||||||
|
#return np.sum(objective)
|
||||||
|
|
||||||
|
#def dlik_df(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Gradient of the link function at y, given f w.r.t f
|
||||||
|
|
||||||
|
#$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
|
||||||
|
|
||||||
|
#:y: data
|
||||||
|
#:f: latent variables f
|
||||||
|
#:extra_data: extra_data which is not used in student t distribution
|
||||||
|
#:returns: gradient of likelihood evaluated at points
|
||||||
|
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||||
|
#return grad
|
||||||
|
|
||||||
|
#def d2lik_d2f(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
|
||||||
|
#i.e. second derivative link_function at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
#Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||||
|
#(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
|
||||||
|
|
||||||
|
#$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
|
||||||
|
|
||||||
|
#:y: data
|
||||||
|
#:f: latent variables f
|
||||||
|
#:extra_data: extra_data which is not used in student t distribution
|
||||||
|
#:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / (((self.sigma2*self.v) + e**2)**2)
|
||||||
|
#return hess
|
||||||
|
|
||||||
|
#def d3lik_d3f(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
|
||||||
|
|
||||||
|
#$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||||
|
#((e**2 + self.sigma2*self.v)**3)
|
||||||
|
#)
|
||||||
|
#return d3lik_d3f
|
||||||
|
|
||||||
|
#def lik_dstd(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
#Terms relavent to derivatives wrt sigma are:
|
||||||
|
#-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
|
||||||
|
|
||||||
|
#$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#sigma = np.sqrt(self.sigma2)
|
||||||
|
##dlik_dsigma = ( - (1/sigma) +
|
||||||
|
##((1+self.v)*(e**2))/((sigma*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
||||||
|
##)
|
||||||
|
##dlik_dsigma = ( - 1 +
|
||||||
|
##((1+self.v)*(e**2))/((self.sigma2*self.sigma2)*self.v*(1 + ((e**2) / (self.sigma2*self.v)) ) )
|
||||||
|
##)
|
||||||
|
##dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
|
||||||
|
#dlik_dsigma = (self.v*((e**2)-self.sigma2))/(sigma*((e**2)+self.sigma2*self.v))
|
||||||
|
#return dlik_dsigma
|
||||||
|
|
||||||
|
#def dlik_df_dstd(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
#$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#sigma = np.sqrt(self.sigma2)
|
||||||
|
#dlik_grad_dsigma = ((-2*sigma*self.v*(self.v + 1)*e) #2 might not want to be here?
|
||||||
|
#/ ((self.v*self.sigma2 + e**2)**2)
|
||||||
|
#)
|
||||||
|
#return dlik_grad_dsigma
|
||||||
|
|
||||||
|
#def d2lik_d2f_dstd(self, y, f, extra_data=None):
|
||||||
|
#"""
|
||||||
|
#Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
|
||||||
|
|
||||||
|
#$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
|
||||||
|
#"""
|
||||||
|
#assert y.shape == f.shape
|
||||||
|
#e = y - f
|
||||||
|
#sigma = np.sqrt(self.sigma2)
|
||||||
|
#dlik_hess_dsigma = ( (2*sigma*self.v*(self.v + 1)*(self.sigma2*self.v - 3*(e**2))) /
|
||||||
|
#((e**2 + self.sigma2*self.v)**3)
|
||||||
|
#)
|
||||||
|
##dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
|
||||||
|
##/ ((e**2 + (self.sigma**2)*self.v)**3) )
|
||||||
|
#return dlik_hess_dsigma
|
||||||
|
|
||||||
|
#def _gradients(self, y, f, extra_data=None):
|
||||||
|
##must be listed in same order as 'get_param_names'
|
||||||
|
#derivs = ([self.lik_dstd(y, f, extra_data=extra_data)],
|
||||||
|
#[self.dlik_df_dstd(y, f, extra_data=extra_data)],
|
||||||
|
#[self.d2lik_d2f_dstd(y, f, extra_data=extra_data)]
|
||||||
|
#) # lists as we might learn many parameters
|
||||||
|
## ensure we have gradients for every parameter we want to optimize
|
||||||
|
#assert len(derivs[0]) == len(self._get_param_names())
|
||||||
|
#assert len(derivs[1]) == len(self._get_param_names())
|
||||||
|
#assert len(derivs[2]) == len(self._get_param_names())
|
||||||
|
#return derivs
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue