From a7169ab1ab771e567e45d6a11ae9e13b13f3c754 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 1 Jul 2013 15:21:47 +0100 Subject: [PATCH] Fixed bug where B wasn't refering to current f location --- GPy/core/model.py | 3 +++ GPy/examples/laplace_approximations.py | 5 +++-- GPy/likelihoods/Laplace.py | 21 ++++++++++----------- GPy/likelihoods/likelihood_functions.py | 6 +++++- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 94202396..83a4a428 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -244,6 +244,9 @@ class model(parameterised): LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients()) obj_grads = -LL_gradients - prior_gradients + print self + #self.checkgrad(verbose=1) + import ipdb; ipdb.set_trace() ### XXX BREAKPOINT return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): diff --git a/GPy/examples/laplace_approximations.py b/GPy/examples/laplace_approximations.py index 24f2d88c..bb621424 100644 --- a/GPy/examples/laplace_approximations.py +++ b/GPy/examples/laplace_approximations.py @@ -100,7 +100,7 @@ def debug_student_t_noise_approx(): Y = Y/Y.max() #Add student t random noise to datapoints - deg_free = 10000 + deg_free = 1000 real_sd = np.sqrt(real_var) print "Real noise std: ", real_sd @@ -152,7 +152,7 @@ def debug_student_t_noise_approx(): m.constrain_positive('t_noise_std') #m.constrain_positive('') m.ensure_default_constraints() - #m.constrain_fixed('t_noi', real_sd) + m.constrain_bounded('t_noi', 0.001, 10) #m['rbf_var'] = 0.20446332 #m['rbf_leng'] = 0.85776241 #m['t_noise'] = 0.667083294421005 @@ -168,6 +168,7 @@ def debug_student_t_noise_approx(): plt.plot(X_full, Y_full) plt.ylim(-2.5, 2.5) print "Real noise std: ", real_sd + print "or Real noise std: ", real_stu_t_std return m #print "Clean student t, ncg" diff --git a/GPy/likelihoods/Laplace.py b/GPy/likelihoods/Laplace.py index e4652f27..4c9c67df 100644 --- a/GPy/likelihoods/Laplace.py +++ b/GPy/likelihoods/Laplace.py @@ -158,7 +158,6 @@ class Laplace(likelihood): self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K) l = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data) - #print "fDf:{} l:{} detKWiBi:{} W:{} Wi:{} Bi:{} Ki:{}".format(fDf, l, ln_det_K_Wi__Bi, W.sum(), Wi.sum(), self.Bi.sum(), Ki.sum()) y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) Z_tilde = (+ self.NORMAL_CONST @@ -199,14 +198,14 @@ class Laplace(likelihood): self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data) if not self.likelihood_function.log_concave: - self.W[self.W < 0] = 1e-8 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + self.W[self.W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur #If the likelihood is non-log-concave. We wan't to say that there is a negative variance #To cause the posterior to become less certain than the prior and likelihood, #This is a property only held by non-log-concave likelihoods #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though - #self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W) - #self.Bi, _, _, B_det = pdinv(self.B) + self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W) + self.Bi, _, _, B_det = pdinv(self.B) #Do the computation again at f to get Ki_f which is useful #b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data) @@ -305,14 +304,14 @@ class Laplace(likelihood): return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data) difference = np.inf - epsilon = 1e-6 + epsilon = 1e-10 step_size = 1 rs = 0 i = 0 while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART: W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data) if not self.likelihood_function.log_concave: - W[W < 0] = 0#1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + W[W < 0] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods @@ -335,13 +334,13 @@ class Laplace(likelihood): def inner_obj(step_size, old_a, da, K): a = old_a + step_size*da f = np.dot(K, a) - self.a = a + self.a = a # This is nasty, need to set something within an optimization though self.f = f return -obj(a, f) from functools import partial i_o = partial(inner_obj, old_a=old_a, da=da, K=self.K) - new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10) + new_obj = sp.optimize.brent(i_o, tol=1e-6, maxiter=10) #update_passed = False #while not update_passed: @@ -373,8 +372,8 @@ class Laplace(likelihood): i += 1 #print "Positive difference obj: ", np.float(difference) - #print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) + print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size) #self.a = a - self.B, self.B_chol, self.W_12 = B, L, W_12 - self.Bi, _, _, B_det = pdinv(self.B) + #self.B, self.B_chol, self.W_12 = B, L, W_12 + #self.Bi, _, _, B_det = pdinv(self.B) return f diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index ebc87f56..57627198 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -195,8 +195,9 @@ class student_t(likelihood_function): e = y - f objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) - - np.log(self.sigma * np.sqrt(self.v * np.pi)) + - 0.5*np.log((self.sigma**2) * self.v * np.pi) - (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v)) + #- (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2))) ) return np.sum(objective) @@ -264,6 +265,7 @@ class student_t(likelihood_function): dlik_dsigma = ( - (1/self.sigma) + ((1+self.v)*(e**2))/((self.sigma**3)*self.v*(1 + ((e**2) / ((self.sigma**2)*self.v)) ) ) ) + #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1 return dlik_dsigma def dlik_df_dstd(self, y, f, extra_data=None): @@ -290,6 +292,8 @@ class student_t(likelihood_function): dlik_hess_dsigma = ( (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2))) / ((e**2 + (self.sigma**2)*self.v)**3) ) + #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2)) + #/ ((e**2 + (self.sigma**2)*self.v)**3) ) return dlik_hess_dsigma def _gradients(self, y, f, extra_data=None):