mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
merge changes
This commit is contained in:
commit
8a83845937
65 changed files with 1197 additions and 600 deletions
|
|
@ -19,19 +19,15 @@ class DTC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y):
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
#TODO: MAX! fix this!
|
||||
from ...util.misc import param_to_array
|
||||
Y = param_to_array(Y)
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#make sure the noise is not hetero
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
if beta.size <1:
|
||||
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||
if beta.size > 1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
|
|
@ -91,19 +87,15 @@ class vDTC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
#TODO: MAX! fix this!
|
||||
from ...util.misc import param_to_array
|
||||
Y = param_to_array(Y)
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#make sure the noise is not hetero
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
if beta.size <1:
|
||||
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||
if beta.size > 1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
|
|
@ -112,7 +104,7 @@ class vDTC(object):
|
|||
U = Knm
|
||||
Uy = np.dot(U.T,Y)
|
||||
|
||||
#factor Kmm
|
||||
#factor Kmm
|
||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||
|
||||
# Compute A
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
|
@ -41,7 +42,9 @@ class ExactGaussianInference(object):
|
|||
|
||||
K = kern.K(X)
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
||||
Ky = K.copy()
|
||||
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
|
||||
Wi, LW, LWi, W_logdet = pdinv(Ky)
|
||||
|
||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ class EP(object):
|
|||
|
||||
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||
:type epsilon: float
|
||||
:param eta: Power EP thing TODO: Ricardo: what, exactly?
|
||||
:param eta: parameter for fractional EP updates.
|
||||
:type eta: float64
|
||||
:param delta: Power EP thing TODO: Ricardo: what, exactly?
|
||||
:param delta: damping EP updates factor.
|
||||
:type delta: float64
|
||||
"""
|
||||
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ class FITC(object):
|
|||
"""
|
||||
const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y):
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#make sure the noise is not hetero
|
||||
sigma_n = np.squeeze(likelihood.variance)
|
||||
if sigma_n.size <1:
|
||||
sigma_n = likelihood.gaussian_variance(Y_metadata)
|
||||
if sigma_n.size >1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of FITC"
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
|
|
|
|||
|
|
@ -51,12 +51,11 @@ class Laplace(object):
|
|||
Ki_f_init = self._previous_Ki_fhat
|
||||
|
||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
self.f_hat = f_hat
|
||||
self.Ki_fhat = Ki_fhat
|
||||
self.K = K.copy()
|
||||
#Compute hessian and other variables at mode
|
||||
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||
|
||||
self._previous_Ki_fhat = Ki_fhat.copy()
|
||||
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
|
@ -86,13 +85,13 @@ class Laplace(object):
|
|||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
|
||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
|
||||
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
|
||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
W_f = W*f
|
||||
|
||||
|
|
@ -136,13 +135,12 @@ class Laplace(object):
|
|||
At the mode, compute the hessian and effective covariance matrix.
|
||||
|
||||
returns: logZ : approximation to the marginal likelihood
|
||||
woodbury_vector : variable required for calculating the approximation to the covariance matrix
|
||||
woodbury_inv : variable required for calculating the approximation to the covariance matrix
|
||||
dL_dthetaL : array of derivatives (1 x num_kernel_params)
|
||||
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
|
||||
"""
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
|
|
@ -151,11 +149,10 @@ class Laplace(object):
|
|||
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
|
||||
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||
|
||||
#Compute vival matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
|
||||
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
||||
#BiK, _ = dpotrs(L, K, lower=1)
|
||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||
|
|
@ -169,7 +166,7 @@ class Laplace(object):
|
|||
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
||||
|
||||
#Implicit
|
||||
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
|
||||
implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i)
|
||||
|
||||
dL_dK = explicit_part + implicit_part
|
||||
else:
|
||||
|
|
@ -179,7 +176,7 @@ class Laplace(object):
|
|||
#compute dL_dthetaL#
|
||||
####################
|
||||
if likelihood.size > 0 and not likelihood.is_fixed:
|
||||
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
|
||||
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
num_params = likelihood.size
|
||||
# make space for one derivative for each likelihood parameter
|
||||
|
|
@ -200,7 +197,7 @@ class Laplace(object):
|
|||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
|
||||
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -73,20 +73,37 @@ class Posterior(object):
|
|||
|
||||
@property
|
||||
def mean(self):
|
||||
"""
|
||||
Posterior mean
|
||||
$$
|
||||
K_{xx}v
|
||||
v := \texttt{Woodbury vector}
|
||||
$$
|
||||
"""
|
||||
if self._mean is None:
|
||||
self._mean = np.dot(self._K, self.woodbury_vector)
|
||||
return self._mean
|
||||
|
||||
@property
|
||||
def covariance(self):
|
||||
"""
|
||||
Posterior covariance
|
||||
$$
|
||||
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
|
||||
W_{xx} := \texttt{Woodbury inv}
|
||||
$$
|
||||
"""
|
||||
if self._covariance is None:
|
||||
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
|
||||
self._covariance = self._K - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
|
||||
self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
|
||||
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
|
||||
return self._covariance.squeeze()
|
||||
return self._covariance
|
||||
|
||||
@property
|
||||
def precision(self):
|
||||
"""
|
||||
Inverse of posterior covariance
|
||||
"""
|
||||
if self._precision is None:
|
||||
cov = np.atleast_3d(self.covariance)
|
||||
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
||||
|
|
@ -96,8 +113,15 @@ class Posterior(object):
|
|||
|
||||
@property
|
||||
def woodbury_chol(self):
|
||||
"""
|
||||
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
|
||||
$$
|
||||
L_{W}L_{W}^{\top} = W^{-1}
|
||||
W^{-1} := \texttt{Woodbury inv}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_chol is None:
|
||||
#compute woodbury chol from
|
||||
#compute woodbury chol from
|
||||
if self._woodbury_inv is not None:
|
||||
winv = np.atleast_3d(self._woodbury_inv)
|
||||
self._woodbury_chol = np.zeros(winv.shape)
|
||||
|
|
@ -121,6 +145,13 @@ class Posterior(object):
|
|||
|
||||
@property
|
||||
def woodbury_inv(self):
|
||||
"""
|
||||
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma_{xx})^{-1}
|
||||
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_inv is None:
|
||||
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
|
||||
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
|
||||
|
|
@ -129,17 +160,22 @@ class Posterior(object):
|
|||
|
||||
@property
|
||||
def woodbury_vector(self):
|
||||
"""
|
||||
Woodbury vector in the gaussian likelihood case only is defined as
|
||||
$$
|
||||
(K_{xx} + \Sigma)^{-1}Y
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
"""
|
||||
if self._woodbury_vector is None:
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
||||
return self._woodbury_vector
|
||||
|
||||
@property
|
||||
def K_chol(self):
|
||||
"""
|
||||
Cholesky of the prior covariance K
|
||||
"""
|
||||
if self._K_chol is None:
|
||||
self._K_chol = jitchol(self._K)
|
||||
return self._K_chol
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -176,7 +176,6 @@ class VarDTC(object):
|
|||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
|
|
@ -365,7 +364,7 @@ class VarDTCMissingData(object):
|
|||
return post, log_marginal, grad_dict
|
||||
|
||||
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
if het_noise:
|
||||
|
|
|
|||
63
GPy/inference/optimization/BayesOpt.py
Normal file
63
GPy/inference/optimization/BayesOpt.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import numpy as np
|
||||
from scipy.stats import norm
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
####### Preliminar BO with standad acquisition functions ###############################
|
||||
# Types of BO
|
||||
# MM: Maximum (or minimum) mean
|
||||
# MPI: Maximum posterior improvement
|
||||
# MUI: Maximum upper interval
|
||||
|
||||
def BOacquisition(X,Y,model,type_bo="MPI",type_objective="max",par_mpi = 0,z_mui=1.96,plot=True,n_eval = 500):
|
||||
|
||||
# Only works in dimension 1
|
||||
# Grid where the GP will be evaluated
|
||||
X_star = np.linspace(min(X)-10,max(X)+10,n_eval)
|
||||
X_star = X_star[:,None]
|
||||
|
||||
# Posterior GP evaluated on the grid
|
||||
fest = model.predict(X_star)
|
||||
|
||||
# Calculate the acquisition function
|
||||
## IF Maximize
|
||||
if type_objective == "max":
|
||||
if type_bo == "MPI": # add others here
|
||||
acqu = norm.cdf((fest[0]-(1+par_mpi)*max(fest[0])) / fest[1])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
if type_bo == "MM":
|
||||
acqu = fest[0]/max(fest[0])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
if type_bo == "MUI":
|
||||
acqu = fest[0]+z_mui*np.sqrt(fest[1])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
optimal_loc = np.argmax(acqu)
|
||||
x_new = X_star[optimal_loc]
|
||||
|
||||
## IF Minimize
|
||||
if type_objective == "min":
|
||||
if type_bo == "MPI": # add others here
|
||||
acqu = 1-norm.cdf((fest[0]-(1+par_mpi)*min(fest[0])) / fest[1])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
if type_bo == "MM":
|
||||
acqu = 1-fest[0]/max(fest[0])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
if type_bo == "MUI":
|
||||
acqu = -fest[0]+z_mui*np.sqrt(fest[1])
|
||||
acqu = acqu/(2*max(acqu))
|
||||
optimal_loc = np.argmax(acqu)
|
||||
x_new = X_star[optimal_loc]
|
||||
|
||||
# Plot GP posterior, collected data and the acquisition function
|
||||
if plot:
|
||||
plt.plot(X,Y , 'p')
|
||||
plt.title('Acquisition function')
|
||||
model.plot()
|
||||
plt.plot(X_star, acqu, 'r--')
|
||||
|
||||
|
||||
# Return the point where we shoould take the new sample
|
||||
return x_new
|
||||
###############################################################
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue