Fixed bug where B wasn't refering to current f location

2026-07-26 17:11:06 +02:00 · 2013-07-01 15:21:47 +01:00 · 2013-07-01 15:21:47 +01:00 · a7169ab1ab
commit a7169ab1ab
parent 26b3855af5
4 changed files with 21 additions and 14 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -244,6 +244,9 @@ class model(parameterised):
        LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
        prior_gradients = self._transform_gradients(self._log_prior_gradients())
        obj_grads = -LL_gradients - prior_gradients
+        print self
+        #self.checkgrad(verbose=1)
+        import ipdb; ipdb.set_trace() ### XXX BREAKPOINT
        return obj_f, obj_grads

    def optimize(self, optimizer=None, start=None, **kwargs):
--- a/GPy/examples/laplace_approximations.py
+++ b/GPy/examples/laplace_approximations.py
@ -100,7 +100,7 @@ def debug_student_t_noise_approx():
    Y = Y/Y.max()

    #Add student t random noise to datapoints
-    deg_free = 10000
+    deg_free = 1000

    real_sd = np.sqrt(real_var)
    print "Real noise std: ", real_sd
@ -152,7 +152,7 @@ def debug_student_t_noise_approx():
    m.constrain_positive('t_noise_std')
    #m.constrain_positive('')
    m.ensure_default_constraints()
-    #m.constrain_fixed('t_noi', real_sd)
+    m.constrain_bounded('t_noi', 0.001, 10)
    #m['rbf_var'] = 0.20446332
    #m['rbf_leng'] = 0.85776241
    #m['t_noise'] = 0.667083294421005
@ -168,6 +168,7 @@ def debug_student_t_noise_approx():
        plt.plot(X_full, Y_full)
        plt.ylim(-2.5, 2.5)
    print "Real noise std: ", real_sd
+    print "or Real noise std: ", real_stu_t_std
    return m

    #print "Clean student t, ncg"
--- a/GPy/likelihoods/Laplace.py
+++ b/GPy/likelihoods/Laplace.py
@ -158,7 +158,6 @@ class Laplace(likelihood):
        self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R
        ln_det_K_Wi__Bi = self.ln_I_KW_det + pddet(self.Sigma_tilde + self.K)
        l = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data)
-        #print "fDf:{}   l:{}   detKWiBi:{}   W:{}   Wi:{}   Bi:{}   Ki:{}".format(fDf, l, ln_det_K_Wi__Bi, W.sum(), Wi.sum(), self.Bi.sum(), Ki.sum())

        y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
        Z_tilde = (+ self.NORMAL_CONST
@ -199,14 +198,14 @@ class Laplace(likelihood):
        self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data)

        if not self.likelihood_function.log_concave:
-            self.W[self.W < 0] = 1e-8  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+            self.W[self.W < 0] = 1e-6  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                       #If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                       #To cause the posterior to become less certain than the prior and likelihood,
                                       #This is a property only held by non-log-concave likelihoods

        #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
-        #self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W)
-        #self.Bi, _, _, B_det = pdinv(self.B)
+        self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W)
+        self.Bi, _, _, B_det = pdinv(self.B)

        #Do the computation again at f to get Ki_f which is useful
        #b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data)
@ -305,14 +304,14 @@ class Laplace(likelihood):
            return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data)

        difference = np.inf
-        epsilon = 1e-6
+        epsilon = 1e-10
        step_size = 1
        rs = 0
        i = 0
        while difference > epsilon and i < MAX_ITER and rs < MAX_RESTART:
            W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data)
            if not self.likelihood_function.log_concave:
-                W[W < 0] = 0#1e-6     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
+                W[W < 0] = 1e-6     # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                    # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
                                    # To cause the posterior to become less certain than the prior and likelihood,
                                    # This is a property only held by non-log-concave likelihoods
@ -335,13 +334,13 @@ class Laplace(likelihood):
            def inner_obj(step_size, old_a, da, K):
                a = old_a + step_size*da
                f = np.dot(K, a)
-                self.a = a
+                self.a = a # This is nasty, need to set something within an optimization though
                self.f = f
                return -obj(a, f)

            from functools import partial
            i_o = partial(inner_obj, old_a=old_a, da=da, K=self.K)
-            new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
+            new_obj = sp.optimize.brent(i_o, tol=1e-6, maxiter=10)

            #update_passed = False
            #while not update_passed:
@ -373,8 +372,8 @@ class Laplace(likelihood):
            i += 1

        #print "Positive difference obj: ", np.float(difference)
-        #print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
+        print "Iterations: {}, Step size reductions: {}, Final_difference: {}, step_size: {}".format(i, rs, difference, step_size)
        #self.a = a
-        self.B, self.B_chol, self.W_12 = B, L, W_12
-        self.Bi, _, _, B_det = pdinv(self.B)
+        #self.B, self.B_chol, self.W_12 = B, L, W_12
+        #self.Bi, _, _, B_det = pdinv(self.B)
        return f
--- a/GPy/likelihoods/likelihood_functions.py
+++ b/GPy/likelihoods/likelihood_functions.py
@ -195,8 +195,9 @@ class student_t(likelihood_function):
        e = y - f
        objective = (+ gammaln((self.v + 1) * 0.5)
                     - gammaln(self.v * 0.5)
-                     - np.log(self.sigma * np.sqrt(self.v * np.pi))
+                     - 0.5*np.log((self.sigma**2) * self.v * np.pi)
                     - (self.v + 1) * 0.5 * np.log(1 + (((e / self.sigma)**2) / self.v))
+                     #- (self.v + 1) * 0.5 * np.log(1 + (e**2)/(self.v*(self.sigma**2)))
                    )
        return np.sum(objective)

@ -264,6 +265,7 @@ class student_t(likelihood_function):
        dlik_dsigma = ( - (1/self.sigma) +
                        ((1+self.v)*(e**2))/((self.sigma**3)*self.v*(1 + ((e**2) / ((self.sigma**2)*self.v)) ) )
                      )
+        #dlik_dsigma = (((self.v + 1)*(e**2))/((e**2) + self.v*(self.sigma**2))) - 1
        return dlik_dsigma

    def dlik_df_dstd(self, y, f, extra_data=None):
@ -290,6 +292,8 @@ class student_t(likelihood_function):
        dlik_hess_dsigma = (  (2*self.sigma*self.v*(self.v + 1)*((self.sigma**2)*self.v - 3*(e**2))) /
                              ((e**2 + (self.sigma**2)*self.v)**3)
                           )
+        #dlik_hess_dsigma = ( 2*(self.v + 1)*self.v*(self.sigma**2)*((e**2) + (self.v*(self.sigma**2)) - 4*(e**2))
+                             #/ ((e**2 + (self.sigma**2)*self.v)**3) )
        return dlik_hess_dsigma

    def _gradients(self, y, f, extra_data=None):