mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
Merge branch 'devel' into plot_density
This commit is contained in:
commit
3833ac5a49
4 changed files with 87 additions and 25 deletions
|
|
@ -160,32 +160,40 @@ class SparseGP(GP):
|
||||||
else:
|
else:
|
||||||
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
||||||
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
||||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
psi2_star = kern.psi2n(self._predictive_variable, Xnew)
|
||||||
la = self.posterior.woodbury_vector
|
la = self.posterior.woodbury_vector
|
||||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||||
|
N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
|
||||||
|
|
||||||
if full_cov:
|
if full_cov:
|
||||||
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
|
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
|
||||||
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
|
var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
|
||||||
di = np.diag_indices(la.shape[1])
|
di = np.diag_indices(la.shape[1])
|
||||||
else:
|
else:
|
||||||
var = np.empty((Xnew.shape[0], la.shape[1]))
|
tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
|
||||||
|
var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
|
||||||
for i in range(Xnew.shape[0]):
|
if self.posterior.woodbury_inv.ndim==2:
|
||||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.flat)[:,None]
|
||||||
psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
|
||||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
|
||||||
|
|
||||||
var_ = mdot(la.T, tmp, la)
|
|
||||||
p0 = psi0_star[i]
|
|
||||||
t = np.atleast_3d(self.posterior.woodbury_inv)
|
|
||||||
t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
|
|
||||||
|
|
||||||
if full_cov:
|
|
||||||
var_[di] += p0
|
|
||||||
var_[di] += -t2
|
|
||||||
var[i] = var_
|
|
||||||
else:
|
else:
|
||||||
var[i] = np.diag(var_)+p0-t2
|
var += -psi2_star.reshape(N,-1).dot(self.posterior.woodbury_inv.reshape(-1,D))
|
||||||
|
assert np.all(var>=-1e-5), "The predicted variance goes negative!: "+str(var)
|
||||||
|
var = np.clip(var,1e-15,np.inf)
|
||||||
|
|
||||||
|
# for i in range(Xnew.shape[0]):
|
||||||
|
# _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||||
|
# psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
||||||
|
# tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||||
|
#
|
||||||
|
# var_ = mdot(la.T, tmp, la)
|
||||||
|
# p0 = psi0_star[i]
|
||||||
|
# t = np.atleast_3d(self.posterior.woodbury_inv)
|
||||||
|
# t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
|
||||||
|
#
|
||||||
|
# if full_cov:
|
||||||
|
# var_[di] += p0
|
||||||
|
# var_[di] += -t2
|
||||||
|
# var[i] = var_
|
||||||
|
# else:
|
||||||
|
# var[i] = np.diag(var_)+p0-t2
|
||||||
|
|
||||||
return mu, var
|
return mu, var
|
||||||
|
|
|
||||||
|
|
@ -32,8 +32,8 @@ class Laplace(LatentFunctionInference):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self._mode_finding_tolerance = 1e-7
|
self._mode_finding_tolerance = 1e-4
|
||||||
self._mode_finding_max_iter = 60
|
self._mode_finding_max_iter = 30
|
||||||
self.bad_fhat = False
|
self.bad_fhat = False
|
||||||
#Store whether it is the first run of the inference so that we can choose whether we need
|
#Store whether it is the first run of the inference so that we can choose whether we need
|
||||||
#to calculate things or reuse old variables
|
#to calculate things or reuse old variables
|
||||||
|
|
@ -209,9 +209,12 @@ class Laplace(LatentFunctionInference):
|
||||||
Ki_f_new = Ki_f + step*dKi_f
|
Ki_f_new = Ki_f + step*dKi_f
|
||||||
f_new = np.dot(K, Ki_f_new)
|
f_new = np.dot(K, Ki_f_new)
|
||||||
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
|
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
|
||||||
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
|
old_obj = obj(Ki_f, f)
|
||||||
|
new_obj = obj(Ki_f_new, f_new)
|
||||||
|
if new_obj < old_obj:
|
||||||
raise ValueError("Shouldn't happen, brent optimization failing")
|
raise ValueError("Shouldn't happen, brent optimization failing")
|
||||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
difference = np.abs(new_obj - old_obj)
|
||||||
|
# difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||||
Ki_f = Ki_f_new
|
Ki_f = Ki_f_new
|
||||||
f = f_new
|
f = f_new
|
||||||
iteration += 1
|
iteration += 1
|
||||||
|
|
@ -316,6 +319,9 @@ class Laplace(LatentFunctionInference):
|
||||||
if not log_concave:
|
if not log_concave:
|
||||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||||
W = np.clip(W, 1e-6, 1e+30)
|
W = np.clip(W, 1e-6, 1e+30)
|
||||||
|
# For student-T we can clip this more intelligently. If the
|
||||||
|
# objective has hardly changed, we can increase the clipping limit
|
||||||
|
# by ((v+1)/v)/sigma2
|
||||||
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
|
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
|
||||||
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||||
|
|
@ -339,7 +345,7 @@ class Laplace(LatentFunctionInference):
|
||||||
|
|
||||||
#compute vital matrices
|
#compute vital matrices
|
||||||
C = np.dot(LiW12, K)
|
C = np.dot(LiW12, K)
|
||||||
Ki_W_i = K - C.T.dot(C)
|
Ki_W_i = K - C.T.dot(C)
|
||||||
|
|
||||||
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
|
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
|
||||||
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
|
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,33 @@ class opt_lbfgsb(Optimizer):
|
||||||
#a more helpful error message is available in opt_result in the Error case
|
#a more helpful error message is available in opt_result in the Error case
|
||||||
if opt_result[2]['warnflag']==2:
|
if opt_result[2]['warnflag']==2:
|
||||||
self.status = 'Error' + str(opt_result[2]['task'])
|
self.status = 'Error' + str(opt_result[2]['task'])
|
||||||
|
|
||||||
|
class opt_bfgs(Optimizer):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
Optimizer.__init__(self, *args, **kwargs)
|
||||||
|
self.opt_name = "BFGS (Scipy implementation)"
|
||||||
|
|
||||||
|
def opt(self, f_fp=None, f=None, fp=None):
|
||||||
|
"""
|
||||||
|
Run the optimizer
|
||||||
|
|
||||||
|
"""
|
||||||
|
rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing']
|
||||||
|
|
||||||
|
opt_dict = {}
|
||||||
|
if self.xtol is not None:
|
||||||
|
print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it")
|
||||||
|
if self.ftol is not None:
|
||||||
|
print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it")
|
||||||
|
if self.gtol is not None:
|
||||||
|
opt_dict['pgtol'] = self.gtol
|
||||||
|
|
||||||
|
opt_result = optimize.fmin_bfgs(f, self.x_init, fp, disp=self.messages,
|
||||||
|
maxiter=self.max_iters, full_output=True, **opt_dict)
|
||||||
|
self.x_opt = opt_result[0]
|
||||||
|
self.f_opt = f_fp(self.x_opt)[0]
|
||||||
|
self.funct_eval = opt_result[4]
|
||||||
|
self.status = rcstrings[opt_result[6]]
|
||||||
|
|
||||||
class opt_simplex(Optimizer):
|
class opt_simplex(Optimizer):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
@ -245,6 +272,7 @@ def get_optimizer(f_min):
|
||||||
optimizers = {'fmin_tnc': opt_tnc,
|
optimizers = {'fmin_tnc': opt_tnc,
|
||||||
'simplex': opt_simplex,
|
'simplex': opt_simplex,
|
||||||
'lbfgsb': opt_lbfgsb,
|
'lbfgsb': opt_lbfgsb,
|
||||||
|
'org-bfgs': opt_bfgs,
|
||||||
'scg': opt_SCG,
|
'scg': opt_SCG,
|
||||||
'adadelta':Opt_Adadelta}
|
'adadelta':Opt_Adadelta}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ from .stationary import Stationary
|
||||||
from .psi_comp import PSICOMP_RBF
|
from .psi_comp import PSICOMP_RBF
|
||||||
from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
|
from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
|
||||||
from ...util.config import *
|
from ...util.config import *
|
||||||
|
from ...core import Param
|
||||||
|
from GPy.core.parameterization.transformations import Logexp
|
||||||
|
|
||||||
class RBF(Stationary):
|
class RBF(Stationary):
|
||||||
"""
|
"""
|
||||||
|
|
@ -18,12 +20,17 @@ class RBF(Stationary):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
_support_GPU = True
|
_support_GPU = True
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False, inv_l=False):
|
||||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
self.psicomp = PSICOMP_RBF_GPU()
|
self.psicomp = PSICOMP_RBF_GPU()
|
||||||
else:
|
else:
|
||||||
self.psicomp = PSICOMP_RBF()
|
self.psicomp = PSICOMP_RBF()
|
||||||
|
self.use_invLengthscale = inv_l
|
||||||
|
if inv_l:
|
||||||
|
self.unlink_parameter(self.lengthscale)
|
||||||
|
self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
|
||||||
|
self.link_parameter(self.inv_l)
|
||||||
|
|
||||||
def K_of_r(self, r):
|
def K_of_r(self, r):
|
||||||
return self.variance * np.exp(-0.5 * r**2)
|
return self.variance * np.exp(-0.5 * r**2)
|
||||||
|
|
@ -47,6 +54,10 @@ class RBF(Stationary):
|
||||||
def spectrum(self, omega):
|
def spectrum(self, omega):
|
||||||
assert self.input_dim == 1 #TODO: higher dim spectra?
|
assert self.input_dim == 1 #TODO: higher dim spectra?
|
||||||
return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
|
return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
|
||||||
|
|
||||||
|
def parameters_changed(self):
|
||||||
|
if self.use_invLengthscale: self.lengthscale[:] = 1./np.sqrt(self.inv_l+1e-200)
|
||||||
|
super(RBF,self).parameters_changed()
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
|
|
@ -68,10 +79,19 @@ class RBF(Stationary):
|
||||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
|
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
|
||||||
self.variance.gradient = dL_dvar
|
self.variance.gradient = dL_dvar
|
||||||
self.lengthscale.gradient = dL_dlengscale
|
self.lengthscale.gradient = dL_dlengscale
|
||||||
|
if self.use_invLengthscale:
|
||||||
|
self.inv_l.gradient = dL_dlengscale*(self.lengthscale**3/-2.)
|
||||||
|
|
||||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
|
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
|
||||||
|
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]
|
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
super(RBF,self).update_gradients_diag(dL_dKdiag, X)
|
||||||
|
if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.)
|
||||||
|
|
||||||
|
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||||
|
super(RBF,self).update_gradients_full(dL_dK, X, X2)
|
||||||
|
if self.use_invLengthscale: self.inv_l.gradient =self.lengthscale.gradient*(self.lengthscale**3/-2.)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue