From e6261c787cf68b67a1e8ec49be79cd455529028f Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Fri, 2 Oct 2015 22:43:54 +0100 Subject: [PATCH 1/5] add original bfgs optimizer and add rbf with inverse lengthscale --- GPy/inference/optimization/optimization.py | 28 ++++++++++++++++++++++ GPy/kern/_src/rbf.py | 22 ++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py index 9aab1bec..a7e44f2e 100644 --- a/GPy/inference/optimization/optimization.py +++ b/GPy/inference/optimization/optimization.py @@ -143,6 +143,33 @@ class opt_lbfgsb(Optimizer): #a more helpful error message is available in opt_result in the Error case if opt_result[2]['warnflag']==2: self.status = 'Error' + str(opt_result[2]['task']) + +class opt_bfgs(Optimizer): + def __init__(self, *args, **kwargs): + Optimizer.__init__(self, *args, **kwargs) + self.opt_name = "BFGS (Scipy implementation)" + + def opt(self, f_fp=None, f=None, fp=None): + """ + Run the optimizer + + """ + rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing'] + + opt_dict = {} + if self.xtol is not None: + print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it") + if self.ftol is not None: + print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it") + if self.gtol is not None: + opt_dict['pgtol'] = self.gtol + + opt_result = optimize.fmin_bfgs(f, self.x_init, fp, disp=self.messages, + maxiter=self.max_iters, full_output=True, **opt_dict) + self.x_opt = opt_result[0] + self.f_opt = f_fp(self.x_opt)[0] + self.funct_eval = opt_result[4] + self.status = rcstrings[opt_result[6]] class opt_simplex(Optimizer): def __init__(self, *args, **kwargs): @@ -255,6 +282,7 @@ def get_optimizer(f_min): optimizers = {'fmin_tnc': opt_tnc, 'simplex': opt_simplex, 'lbfgsb': opt_lbfgsb, + 'org-bfgs': opt_bfgs, 'scg': opt_SCG, 'adadelta':Opt_Adadelta} diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index cb34738a..f4fb2ad5 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -7,6 +7,8 @@ from .stationary import Stationary from .psi_comp import PSICOMP_RBF from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU from ...util.config import * +from ...core import Param +from GPy.core.parameterization.transformations import Logexp class RBF(Stationary): """ @@ -18,12 +20,17 @@ class RBF(Stationary): """ _support_GPU = True - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False, inv_l=False): super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) if self.useGPU: self.psicomp = PSICOMP_RBF_GPU() else: self.psicomp = PSICOMP_RBF() + self.use_invLengthscale = inv_l + if inv_l: + self.unlink_parameter(self.lengthscale) + self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp()) + self.link_parameter(self.inv_l) def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -47,6 +54,10 @@ class RBF(Stationary): def spectrum(self, omega): assert self.input_dim == 1 #TODO: higher dim spectra? return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2) + + def parameters_changed(self): + if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200) + super(RBF,self).parameters_changed() #---------------------------------------# # PSI statistics # @@ -68,10 +79,19 @@ class RBF(Stationary): dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2] self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale + if self.use_invLengthscale: + self.inv_l.gradient = dL_dlengscale*(self.lengthscale**3/-2.) def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2] def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:] + + def update_gradients_diag(self, dL_dKdiag, X): + super(RBF,self).update_gradients_diag(dL_dKdiag, X) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) + def update_gradients_full(self, dL_dK, X, X2=None): + super(RBF,self).update_gradients_full(dL_dK, X, X2) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) From 2dc1b14934a1d22855e81e77a0f2eee52a0d1f7f Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Sun, 4 Oct 2015 12:28:07 +0100 Subject: [PATCH 2/5] New difference method for laplace --- .../latent_function_inference/laplace.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 69eda64a..a77c7631 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -32,8 +32,8 @@ class Laplace(LatentFunctionInference): """ - self._mode_finding_tolerance = 1e-7 - self._mode_finding_max_iter = 60 + self._mode_finding_tolerance = 1e-4 + self._mode_finding_max_iter = 30 self.bad_fhat = False #Store whether it is the first run of the inference so that we can choose whether we need #to calculate things or reuse old variables @@ -209,9 +209,12 @@ class Laplace(LatentFunctionInference): Ki_f_new = Ki_f + step*dKi_f f_new = np.dot(K, Ki_f_new) #print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f)) - if obj(Ki_f_new, f_new) < obj(Ki_f, f): + old_obj = obj(Ki_f, f) + new_obj = obj(Ki_f_new, f_new) + if new_obj < old_obj: raise ValueError("Shouldn't happen, brent optimization failing") - difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) + difference = np.abs(new_obj - old_obj) + # difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) Ki_f = Ki_f_new f = f_new iteration += 1 @@ -316,6 +319,9 @@ class Laplace(LatentFunctionInference): if not log_concave: #print "Under 1e-10: {}".format(np.sum(W < 1e-6)) W = np.clip(W, 1e-6, 1e+30) + # For student-T we can clip this more intelligently. If the + # objective has hardly changed, we can increase the clipping limit + # by ((v+1)/v)/sigma2 # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!! #W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance @@ -339,7 +345,7 @@ class Laplace(LatentFunctionInference): #compute vital matrices C = np.dot(LiW12, K) - Ki_W_i = K - C.T.dot(C) + Ki_W_i = K - C.T.dot(C) I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i) logdet_I_KW = 2*np.sum(np.log(np.diag(L))) From 8689b3dfd0b17ff23b97d8b4c4c63006d3c07af9 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Mon, 5 Oct 2015 22:34:49 +0100 Subject: [PATCH 3/5] a more efficient implementation of prediction with uncertain inputs --- GPy/core/sparse_gp.py | 46 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 9f570591..6b364ab0 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -160,32 +160,40 @@ class SparseGP(GP): else: psi0_star = kern.psi0(self._predictive_variable, Xnew) psi1_star = kern.psi1(self._predictive_variable, Xnew) - #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. + psi2_star = kern.psi2n(self._predictive_variable, Xnew) la = self.posterior.woodbury_vector mu = np.dot(psi1_star, la) # TODO: dimensions? + N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1] if full_cov: raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.") - var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) + var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1])) di = np.diag_indices(la.shape[1]) else: - var = np.empty((Xnew.shape[0], la.shape[1])) - - for i in range(Xnew.shape[0]): - _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] - psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var)) - tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) - - var_ = mdot(la.T, tmp, la) - p0 = psi0_star[i] - t = np.atleast_3d(self.posterior.woodbury_inv) - t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) - - if full_cov: - var_[di] += p0 - var_[di] += -t2 - var[i] = var_ + tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:] + var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None] + if self.posterior.woodbury_inv.ndim==2: + var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None] else: - var[i] = np.diag(var_)+p0-t2 + var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D)) + assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var) + var = np.clip(var,1e-15,np.inf) + +# for i in range(Xnew.shape[0]): +# _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] +# psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var)) +# tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) +# +# var_ = mdot(la.T, tmp, la) +# p0 = psi0_star[i] +# t = np.atleast_3d(self.posterior.woodbury_inv) +# t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) +# +# if full_cov: +# var_[di] += p0 +# var_[di] += -t2 +# var[i] = var_ +# else: +# var[i] = np.diag(var_)+p0-t2 return mu, var From 326ed31fbfff2907bc92d2d444c74d5a24b22691 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Wed, 7 Oct 2015 13:14:47 +0100 Subject: [PATCH 4/5] change the inverse lengthscale of rbf --- GPy/kern/_src/rbf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index f4fb2ad5..d7d1554d 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -29,7 +29,7 @@ class RBF(Stationary): self.use_invLengthscale = inv_l if inv_l: self.unlink_parameter(self.lengthscale) - self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp()) + self.inv_l = Param('inv_lengthscale',1./self.lengthscale, Logexp()) self.link_parameter(self.inv_l) def K_of_r(self, r): @@ -56,7 +56,7 @@ class RBF(Stationary): return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2) def parameters_changed(self): - if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200) + if self.use_invLengthscale: self.lengthscale[:] = 1./(self.inv_l+1e-200) super(RBF,self).parameters_changed() #---------------------------------------# @@ -80,7 +80,7 @@ class RBF(Stationary): self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale if self.use_invLengthscale: - self.inv_l.gradient = dL_dlengscale*(self.lengthscale**3/-2.) + self.inv_l.gradient = dL_dlengscale*(-self.lengthscale**2) def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2] @@ -90,8 +90,8 @@ class RBF(Stationary): def update_gradients_diag(self, dL_dKdiag, X): super(RBF,self).update_gradients_diag(dL_dKdiag, X) - if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(-self.lengthscale**2) def update_gradients_full(self, dL_dK, X, X2=None): super(RBF,self).update_gradients_full(dL_dK, X, X2) - if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(-self.lengthscale**2) From 4a6ff7208081edef452feca089f32ba07c424956 Mon Sep 17 00:00:00 2001 From: Zhenwen Dai Date: Thu, 8 Oct 2015 16:29:57 +0100 Subject: [PATCH 5/5] Revert "change the inverse lengthscale of rbf" This reverts commit 326ed31fbfff2907bc92d2d444c74d5a24b22691. --- GPy/kern/_src/rbf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index d7d1554d..f4fb2ad5 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -29,7 +29,7 @@ class RBF(Stationary): self.use_invLengthscale = inv_l if inv_l: self.unlink_parameter(self.lengthscale) - self.inv_l = Param('inv_lengthscale',1./self.lengthscale, Logexp()) + self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp()) self.link_parameter(self.inv_l) def K_of_r(self, r): @@ -56,7 +56,7 @@ class RBF(Stationary): return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2) def parameters_changed(self): - if self.use_invLengthscale: self.lengthscale[:] = 1./(self.inv_l+1e-200) + if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200) super(RBF,self).parameters_changed() #---------------------------------------# @@ -80,7 +80,7 @@ class RBF(Stationary): self.variance.gradient = dL_dvar self.lengthscale.gradient = dL_dlengscale if self.use_invLengthscale: - self.inv_l.gradient = dL_dlengscale*(-self.lengthscale**2) + self.inv_l.gradient = dL_dlengscale*(self.lengthscale**3/-2.) def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2] @@ -90,8 +90,8 @@ class RBF(Stationary): def update_gradients_diag(self, dL_dKdiag, X): super(RBF,self).update_gradients_diag(dL_dKdiag, X) - if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(-self.lengthscale**2) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.) def update_gradients_full(self, dL_dK, X, X2=None): super(RBF,self).update_gradients_full(dL_dK, X, X2) - if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(-self.lengthscale**2) + if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.)