Merge branch 'devel' into plot_density

2026-05-21 14:05:14 +02:00 · 2015-10-11 14:05:09 +01:00 · 2015-10-11 14:05:09 +01:00 · 3833ac5a49
commit 3833ac5a49
parent d770c46da5 4a6ff72080
4 changed files with 87 additions and 25 deletions
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -160,32 +160,40 @@ class SparseGP(GP):
        else:
            psi0_star = kern.psi0(self._predictive_variable, Xnew)
            psi1_star = kern.psi1(self._predictive_variable, Xnew)
-            #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
+            psi2_star = kern.psi2n(self._predictive_variable, Xnew) 
            la = self.posterior.woodbury_vector
            mu = np.dot(psi1_star, la) # TODO: dimensions?
+            N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]

            if full_cov:
                raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
-                var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
+                var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
                di = np.diag_indices(la.shape[1])
            else:
-                var = np.empty((Xnew.shape[0], la.shape[1]))
-
-            for i in range(Xnew.shape[0]):
-                _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
-                psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
-                tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
-
-                var_ = mdot(la.T, tmp, la)
-                p0 = psi0_star[i]
-                t = np.atleast_3d(self.posterior.woodbury_inv)
-                t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
-
-                if full_cov:
-                    var_[di] += p0
-                    var_[di] += -t2
-                    var[i] = var_
+                tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
+                var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None] 
+                if self.posterior.woodbury_inv.ndim==2:
+                    var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None]
                else:
-                    var[i] = np.diag(var_)+p0-t2
+                    var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D))
+            assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var)
+            var = np.clip(var,1e-15,np.inf)
+
+#             for i in range(Xnew.shape[0]):
+#                 _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
+#                 psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
+#                 tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
+# 
+#                 var_ = mdot(la.T, tmp, la)
+#                 p0 = psi0_star[i]
+#                 t = np.atleast_3d(self.posterior.woodbury_inv)
+#                 t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
+# 
+#                 if full_cov:
+#                     var_[di] += p0
+#                     var_[di] += -t2
+#                     var[i] = var_
+#                 else:
+#                     var[i] = np.diag(var_)+p0-t2

        return mu, var
--- a/GPy/inference/latent_function_inference/laplace.py
+++ b/GPy/inference/latent_function_inference/laplace.py
@ -32,8 +32,8 @@ class Laplace(LatentFunctionInference):

        """

-        self._mode_finding_tolerance = 1e-7
-        self._mode_finding_max_iter = 60
+        self._mode_finding_tolerance = 1e-4
+        self._mode_finding_max_iter = 30
        self.bad_fhat = False
        #Store whether it is the first run of the inference so that we can choose whether we need
        #to calculate things or reuse old variables
@ -209,9 +209,12 @@ class Laplace(LatentFunctionInference):
            Ki_f_new = Ki_f + step*dKi_f
            f_new = np.dot(K, Ki_f_new)
            #print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
-            if obj(Ki_f_new, f_new) < obj(Ki_f, f):
+            old_obj = obj(Ki_f, f)
+            new_obj = obj(Ki_f_new, f_new)
+            if new_obj < old_obj:
                raise ValueError("Shouldn't happen, brent optimization failing")
-            difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
+            difference = np.abs(new_obj - old_obj)
+            # difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
            Ki_f = Ki_f_new
            f = f_new
            iteration += 1
@ -316,6 +319,9 @@ class Laplace(LatentFunctionInference):
        if not log_concave:
            #print "Under 1e-10: {}".format(np.sum(W < 1e-6))
            W = np.clip(W, 1e-6, 1e+30)
+            # For student-T we can clip this more intelligently. If the
+            # objective has hardly changed, we can increase the clipping limit
+            # by ((v+1)/v)/sigma2
            # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
            #W.__setitem__(W < 1e-6, 1e-6, update=False)  # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
                                # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@ -134,6 +134,33 @@ class opt_lbfgsb(Optimizer):
        if opt_result[2]['warnflag']==2:
            self.status = 'Error' + str(opt_result[2]['task'])
            
+class opt_bfgs(Optimizer):
+    def __init__(self, *args, **kwargs):
+        Optimizer.__init__(self, *args, **kwargs)
+        self.opt_name = "BFGS (Scipy implementation)"
+
+    def opt(self, f_fp=None, f=None, fp=None):
+        """
+        Run the optimizer
+
+        """
+        rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing']
+
+        opt_dict = {}
+        if self.xtol is not None:
+            print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it")
+        if self.ftol is not None:
+            print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it")
+        if self.gtol is not None:
+            opt_dict['pgtol'] = self.gtol
+
+        opt_result = optimize.fmin_bfgs(f, self.x_init, fp, disp=self.messages,
+                                            maxiter=self.max_iters, full_output=True, **opt_dict)
+        self.x_opt = opt_result[0]
+        self.f_opt = f_fp(self.x_opt)[0]
+        self.funct_eval = opt_result[4]
+        self.status = rcstrings[opt_result[6]]
+
 class opt_simplex(Optimizer):
    def __init__(self, *args, **kwargs):
        Optimizer.__init__(self, *args, **kwargs)
@ -245,6 +272,7 @@ def get_optimizer(f_min):
    optimizers = {'fmin_tnc': opt_tnc,
          'simplex': opt_simplex,
          'lbfgsb': opt_lbfgsb,
+          'org-bfgs': opt_bfgs,
          'scg': opt_SCG,
          'adadelta':Opt_Adadelta}

--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@ -7,6 +7,8 @@ from .stationary import Stationary
 from .psi_comp import PSICOMP_RBF
 from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
 from ...util.config import *
+from ...core import Param
+from GPy.core.parameterization.transformations import Logexp

 class RBF(Stationary):
    """
@ -18,12 +20,17 @@ class RBF(Stationary):

    """
    _support_GPU = True
-    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
+    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False, inv_l=False):
        super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
        if self.useGPU:
            self.psicomp = PSICOMP_RBF_GPU()
        else:
            self.psicomp = PSICOMP_RBF()
+        self.use_invLengthscale = inv_l
+        if inv_l:
+            self.unlink_parameter(self.lengthscale)
+            self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
+            self.link_parameter(self.inv_l)

    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r**2)
@ -48,6 +55,10 @@ class RBF(Stationary):
        assert self.input_dim == 1 #TODO: higher dim spectra?
        return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
    
+    def parameters_changed(self):
+        if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
+        super(RBF,self).parameters_changed()
+
    #---------------------------------------#
    #             PSI statistics            #
    #---------------------------------------#
@ -68,6 +79,8 @@ class RBF(Stationary):
        dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
        self.variance.gradient = dL_dvar
        self.lengthscale.gradient = dL_dlengscale
+        if self.use_invLengthscale:
+            self.inv_l.gradient = dL_dlengscale*(self.lengthscale**3/-2.)

    def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
@ -75,3 +88,10 @@ class RBF(Stationary):
    def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]
    
+    def update_gradients_diag(self, dL_dKdiag, X):
+        super(RBF,self).update_gradients_diag(dL_dKdiag, X)
+        if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.)
+
+    def update_gradients_full(self, dL_dK, X, X2=None):
+        super(RBF,self).update_gradients_full(dL_dK, X, X2)
+        if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.)