Fixed bug where B wasn't refering to current f location

This commit is contained in:
Alan Saul 2013-07-01 15:21:47 +01:00
parent 26b3855af5
commit a7169ab1ab
4 changed files with 21 additions and 14 deletions

View file

@ -244,6 +244,9 @@ class model(parameterised):
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
obj_grads = -LL_gradients - prior_gradients
print self
#self.checkgrad(verbose=1)
import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):

View file

@ -100,7 +100,7 @@ def debug_student_t_noise_approx():
Y = Y/Y.max()
#Add student t random noise to datapoints
deg_free = 10000
deg_free = 1000
real_sd = np.sqrt(real_var)
print "Real noise std: ", real_sd
@ -152,7 +152,7 @@ def debug_student_t_noise_approx():
m.constrain_positive('t_noise_std')
#m.constrain_positive('')
m.ensure_default_constraints()
#m.constrain_fixed('t_noi', real_sd)
m.constrain_bounded('t_noi', 0.001, 10)
#m['rbf_var'] = 0.20446332
#m['rbf_leng'] = 0.85776241
#m['t_noise'] = 0.667083294421005
@ -168,6 +168,7 @@ def debug_student_t_noise_approx():
plt.plot(X_full, Y_full)
plt.ylim(-2.5, 2.5)
print "Real noise std: ", real_sd
print "or Real noise std: ", real_stu_t_std
return m
#print "Clean student t, ncg"

View file

@ -158,7 +158,6 @@ class Laplace(likelihood):
self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R
ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K)
l = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
#print "fDf:{} l:{} detKWiBi:{} W:{} Wi:{} Bi:{} Ki:{}".format(fDf, l, ln_det_K_Wi__Bi, W.sum(), Wi.sum(), self.Bi.sum(), Ki.sum())
y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ self.NORMAL_CONST
@ -199,14 +198,14 @@ class Laplace(likelihood):
self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data)
if not self.likelihood_function.log_concave:
self.W[self.W < 0] = 1e-8 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
self.W[self.W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
#If the likelihood is non-log-concave. We wan't to say that there is a negative variance
#To cause the posterior to become less certain than the prior and likelihood,
#This is a property only held by non-log-concave likelihoods
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
#self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W)
#self.Bi, _, _, B_det = pdinv(self.B)
self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W)
self.Bi, _, _, B_det = pdinv(self.B)
#Do the computation again at f to get Ki_f which is useful
#b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data)
@ -305,14 +304,14 @@ class Laplace(likelihood):
return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data)
difference = np.inf
epsilon = 1e-6
epsilon = 1e-10
step_size = 1
rs = 0
i = 0
while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
if not self.likelihood_function.log_concave:
W[W < 0] = 0#1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
W[W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
@ -335,13 +334,13 @@ class Laplace(likelihood):
def inner_obj(step_size, old_a, da, K):
a = old_a + step_size*da
f = np.dot(K, a)
self.a = a
self.a = a # This is nasty, need to set something within an optimization though
self.f = f
return -obj(a, f)
from functools import partial
i_o = partial(inner_obj, old_a=old_a, da=da, K=self.K)
new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
new_obj = sp.optimize.brent(i_o, tol=1e-6, maxiter=10)
#update_passed = False
#while not update_passed:
@ -373,8 +372,8 @@ class Laplace(likelihood):
i += 1
#print "Positive difference obj: ", np.float(difference)
#print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
#self.a = a
self.B, self.B_chol, self.W_12 = B, L, W_12
self.Bi, _, _, B_det = pdinv(self.B)
#self.B, self.B_chol, self.W_12 = B, L, W_12
#self.Bi, _, _, B_det = pdinv(self.B)
return f

View file

@ -195,8 +195,9 @@ class student_t(likelihood_function):
e = y - f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- np.log(self.sigma * np.sqrt(self.v * np.pi))
- 0.5*np.log((self.sigma**2) * self.v * np.pi)
- (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
#- (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
)
return np.sum(objective)
@ -264,6 +265,7 @@ class student_t(likelihood_function):
dlik_dsigma = ( - (1/self.sigma) +
((1+self.v)*(e**2))/((self.sigma**3)*self.v*(1 + ((e**2) / ((self.sigma**2)*self.v)) ) )
)
#dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
return dlik_dsigma
def dlik_df_dstd(self, y, f, extra_data=None):
@ -290,6 +292,8 @@ class student_t(likelihood_function):
dlik_hess_dsigma = ( (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2))) /
((e**2 + (self.sigma**2)*self.v)**3)
)
#dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
#/ ((e**2 + (self.sigma**2)*self.v)**3) )
return dlik_hess_dsigma
def _gradients(self, y, f, extra_data=None):