mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-07-02 16:01:03 +02:00
lots of fixes, including prediction being mean and variance only
This commit is contained in:
parent
365b8ae1e1
commit
cc96f5b3d5
13 changed files with 118 additions and 128 deletions
|
|
@ -120,8 +120,12 @@ class GP(Model):
|
||||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
||||||
|
|
||||||
# now push through likelihood
|
# now push through likelihood
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
|
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var
|
||||||
|
|
||||||
|
def predict_quantiles(self, X, quantiles=(0.025, 0.975), Y_metadata=None):
|
||||||
|
m, v = self._raw_predict(X, full_cov=False)
|
||||||
|
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||||
|
|
||||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -54,13 +54,13 @@ class SparseGP(GP):
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
||||||
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||||
if isinstance(self.X, VariationalPosterior):
|
if isinstance(self.X, VariationalPosterior):
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
dL_dKmm = self.grad_dict.pop('dL_dKmm')
|
dL_dKmm = self.grad_dict.pop('dL_dKmm')
|
||||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||||
target = self.kern.gradient.copy()
|
target = self.kern.gradient.copy()
|
||||||
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
|
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||||
self.kern.gradient += target
|
self.kern.gradient += target
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,8 @@ If the likelihood object is something other than Gaussian, then exact inference
|
||||||
is not tractable. We then resort to a Laplace approximation (laplace.py) or
|
is not tractable. We then resort to a Laplace approximation (laplace.py) or
|
||||||
expectation propagation (ep.py).
|
expectation propagation (ep.py).
|
||||||
|
|
||||||
The inference methods return a
|
The inference methods return a
|
||||||
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
|
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
|
||||||
instance, which is a simple
|
instance, which is a simple
|
||||||
structure which contains a summary of the posterior. The model classes can then
|
structure which contains a summary of the posterior. The model classes can then
|
||||||
use this posterior object for making predictions, optimizing hyper-parameters,
|
use this posterior object for making predictions, optimizing hyper-parameters,
|
||||||
|
|
@ -33,13 +33,13 @@ from dtc import DTC
|
||||||
from fitc import FITC
|
from fitc import FITC
|
||||||
|
|
||||||
# class FullLatentFunctionData(object):
|
# class FullLatentFunctionData(object):
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
# class LatentFunctionInference(object):
|
# class LatentFunctionInference(object):
|
||||||
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
# """
|
# """
|
||||||
# Do inference on the latent functions given a covariance function `kern`,
|
# Do inference on the latent functions given a covariance function `kern`,
|
||||||
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
||||||
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||||
# """
|
# """
|
||||||
# raise NotImplementedError, "Abstract base class for full inference"
|
# raise NotImplementedError, "Abstract base class for full inference"
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ class DTC(object):
|
||||||
U = Knm
|
U = Knm
|
||||||
Uy = np.dot(U.T,Y)
|
Uy = np.dot(U.T,Y)
|
||||||
|
|
||||||
#factor Kmm
|
#factor Kmm
|
||||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||||
|
|
||||||
# Compute A
|
# Compute A
|
||||||
|
|
@ -78,7 +78,9 @@ class DTC(object):
|
||||||
Uv = np.dot(U, v)
|
Uv = np.dot(U, v)
|
||||||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
|
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
|
||||||
|
|
||||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
|
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||||
|
|
||||||
|
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
||||||
#construct a posterior object
|
#construct a posterior object
|
||||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||||
|
|
@ -154,11 +156,8 @@ class vDTC(object):
|
||||||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
|
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
|
||||||
dL_dR -=beta*trace_term/num_data
|
dL_dR -=beta*trace_term/num_data
|
||||||
|
|
||||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T}
|
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||||
|
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||||
#update gradients
|
|
||||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
|
||||||
likelihood.update_gradients(dL_dR)
|
|
||||||
|
|
||||||
#construct a posterior object
|
#construct a posterior object
|
||||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class ExactGaussianInference(object):
|
||||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||||
raise NotImplementedError, 'TODO' #TODO
|
raise NotImplementedError, 'TODO' #TODO
|
||||||
|
|
||||||
def inference(self, kern, X, likelihood, Y, **Y_metadata):
|
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Returns a Posterior class containing essential quantities of the posterior
|
Returns a Posterior class containing essential quantities of the posterior
|
||||||
"""
|
"""
|
||||||
|
|
@ -41,12 +41,14 @@ class ExactGaussianInference(object):
|
||||||
|
|
||||||
K = kern.K(X)
|
K = kern.K(X)
|
||||||
|
|
||||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, **Y_metadata))
|
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
||||||
|
|
||||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||||
|
|
||||||
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
||||||
|
|
||||||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||||
|
|
||||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK))
|
||||||
|
|
||||||
|
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
|
||||||
|
|
@ -18,10 +18,6 @@ class FITC(object):
|
||||||
self.const_jitter = 1e-6
|
self.const_jitter = 1e-6
|
||||||
|
|
||||||
def inference(self, kern, X, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y):
|
||||||
|
|
||||||
#TODO: MAX! fix this!
|
|
||||||
from ...util.misc import param_to_array
|
|
||||||
Y = param_to_array(Y)
|
|
||||||
|
|
||||||
num_inducing, _ = Z.shape
|
num_inducing, _ = Z.shape
|
||||||
num_data, output_dim = Y.shape
|
num_data, output_dim = Y.shape
|
||||||
|
|
@ -36,7 +32,7 @@ class FITC(object):
|
||||||
Knm = kern.K(X, Z)
|
Knm = kern.K(X, Z)
|
||||||
U = Knm
|
U = Knm
|
||||||
|
|
||||||
#factor Kmm
|
#factor Kmm
|
||||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||||
|
|
||||||
#compute beta_star, the effective noise precision
|
#compute beta_star, the effective noise precision
|
||||||
|
|
@ -72,7 +68,7 @@ class FITC(object):
|
||||||
vvT_P = tdot(v.reshape(-1,1)) + P
|
vvT_P = tdot(v.reshape(-1,1)) + P
|
||||||
dL_dK = 0.5*(Kmmi - vvT_P)
|
dL_dK = 0.5*(Kmmi - vvT_P)
|
||||||
KiU = np.dot(Kmmi, U.T)
|
KiU = np.dot(Kmmi, U.T)
|
||||||
dL_dK += np.dot(KiU*dL_dR, KiU.T)
|
dL_dK += np.dot(KiU*dL_dR, KiU.T)
|
||||||
|
|
||||||
# Compute dL_dU
|
# Compute dL_dU
|
||||||
vY = np.dot(v.reshape(-1,1),Y.T)
|
vY = np.dot(v.reshape(-1,1),Y.T)
|
||||||
|
|
@ -80,7 +76,8 @@ class FITC(object):
|
||||||
dL_dU *= beta_star
|
dL_dU *= beta_star
|
||||||
dL_dU -= 2.*KiU*dL_dR
|
dL_dU -= 2.*KiU*dL_dR
|
||||||
|
|
||||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'partial_for_likelihood':dL_dR}
|
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||||
|
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
||||||
#construct a posterior object
|
#construct a posterior object
|
||||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||||
|
|
|
||||||
|
|
@ -137,25 +137,25 @@ class VarDTC(object):
|
||||||
psi0, A, LB, trYYT, data_fit)
|
psi0, A, LB, trYYT, data_fit)
|
||||||
|
|
||||||
#put the gradients in the right places
|
#put the gradients in the right places
|
||||||
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
|
dL_dR = _compute_dL_dR(likelihood,
|
||||||
het_noise, uncertain_inputs, LB,
|
het_noise, uncertain_inputs, LB,
|
||||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||||
psi0, psi1, beta,
|
psi0, psi1, beta,
|
||||||
data_fit, num_data, output_dim, trYYT)
|
data_fit, num_data, output_dim, trYYT)
|
||||||
|
|
||||||
#likelihood.update_gradients(partial_for_likelihood)
|
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||||
|
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
'dL_dpsi0':dL_dpsi0,
|
'dL_dpsi0':dL_dpsi0,
|
||||||
'dL_dpsi1':dL_dpsi1,
|
'dL_dpsi1':dL_dpsi1,
|
||||||
'dL_dpsi2':dL_dpsi2,
|
'dL_dpsi2':dL_dpsi2,
|
||||||
'partial_for_likelihood':partial_for_likelihood}
|
'dL_dthetaL':dL_dthetaL}
|
||||||
else:
|
else:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
'dL_dKdiag':dL_dpsi0,
|
'dL_dKdiag':dL_dpsi0,
|
||||||
'dL_dKnm':dL_dpsi1,
|
'dL_dKnm':dL_dpsi1,
|
||||||
'partial_for_likelihood':partial_for_likelihood}
|
'dL_dthetaL':dL_dthetaL}
|
||||||
|
|
||||||
#get sufficient things for posterior prediction
|
#get sufficient things for posterior prediction
|
||||||
#TODO: do we really want to do this in the loop?
|
#TODO: do we really want to do this in the loop?
|
||||||
|
|
@ -232,7 +232,7 @@ class VarDTCMissingData(object):
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
|
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
|
||||||
|
|
||||||
partial_for_likelihood = 0
|
dL_dR = 0
|
||||||
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
||||||
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
|
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
|
||||||
dL_dKmm = 0
|
dL_dKmm = 0
|
||||||
|
|
@ -308,7 +308,7 @@ class VarDTCMissingData(object):
|
||||||
psi0, A, LB, trYYT, data_fit)
|
psi0, A, LB, trYYT, data_fit)
|
||||||
|
|
||||||
#put the gradients in the right places
|
#put the gradients in the right places
|
||||||
partial_for_likelihood += _compute_partial_for_likelihood(likelihood,
|
dL_dR += _compute_dL_dR(likelihood,
|
||||||
het_noise, uncertain_inputs, LB,
|
het_noise, uncertain_inputs, LB,
|
||||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||||
psi0, psi1, beta,
|
psi0, psi1, beta,
|
||||||
|
|
@ -334,12 +334,12 @@ class VarDTCMissingData(object):
|
||||||
'dL_dpsi0':dL_dpsi0_all,
|
'dL_dpsi0':dL_dpsi0_all,
|
||||||
'dL_dpsi1':dL_dpsi1_all,
|
'dL_dpsi1':dL_dpsi1_all,
|
||||||
'dL_dpsi2':dL_dpsi2_all,
|
'dL_dpsi2':dL_dpsi2_all,
|
||||||
'partial_for_likelihood':partial_for_likelihood}
|
'dL_dR':dL_dR}
|
||||||
else:
|
else:
|
||||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||||
'dL_dKdiag':dL_dpsi0_all,
|
'dL_dKdiag':dL_dpsi0_all,
|
||||||
'dL_dKnm':dL_dpsi1_all,
|
'dL_dKnm':dL_dpsi1_all,
|
||||||
'partial_for_likelihood':partial_for_likelihood}
|
'dL_dR':dL_dR}
|
||||||
|
|
||||||
#get sufficient things for posterior prediction
|
#get sufficient things for posterior prediction
|
||||||
#TODO: do we really want to do this in the loop?
|
#TODO: do we really want to do this in the loop?
|
||||||
|
|
@ -385,11 +385,11 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
|
||||||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||||
|
|
||||||
|
|
||||||
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
||||||
# the partial derivative vector for the likelihood
|
# the partial derivative vector for the likelihood
|
||||||
if likelihood.size == 0:
|
if likelihood.size == 0:
|
||||||
# save computation here.
|
# save computation here.
|
||||||
partial_for_likelihood = None
|
dL_dR = None
|
||||||
elif het_noise:
|
elif het_noise:
|
||||||
if uncertain_inputs:
|
if uncertain_inputs:
|
||||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||||
|
|
@ -399,20 +399,20 @@ def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB,
|
||||||
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
||||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
||||||
|
|
||||||
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
|
dL_dR = -0.5 * beta + 0.5 * likelihood.V**2
|
||||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||||
|
|
||||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||||
|
|
||||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# likelihood is not heteroscedatic
|
# likelihood is not heteroscedatic
|
||||||
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||||
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||||
return partial_for_likelihood
|
return dL_dR
|
||||||
|
|
||||||
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit):
|
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit):
|
||||||
#compute log marginal likelihood
|
#compute log marginal likelihood
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ import link_functions
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
from ..core.parameterization import Param
|
from ..core.parameterization import Param
|
||||||
from ..core.parameterization.transformations import Logexp
|
from ..core.parameterization.transformations import Logexp
|
||||||
|
from scipy import stats
|
||||||
|
|
||||||
class Gaussian(Likelihood):
|
class Gaussian(Likelihood):
|
||||||
"""
|
"""
|
||||||
|
|
@ -49,11 +50,14 @@ class Gaussian(Likelihood):
|
||||||
if isinstance(gp_link, link_functions.Identity):
|
if isinstance(gp_link, link_functions.Identity):
|
||||||
self.log_concave = True
|
self.log_concave = True
|
||||||
|
|
||||||
def covariance_matrix(self, Y, **Y_metadata):
|
def covariance_matrix(self, Y, Y_metadata=None):
|
||||||
return np.eye(Y.shape[0]) * self.variance
|
return np.eye(Y.shape[0]) * self.variance
|
||||||
|
|
||||||
def update_gradients(self, partial):
|
def update_gradients(self, grad):
|
||||||
self.variance.gradient = np.sum(partial)
|
self.variance.gradient = grad
|
||||||
|
|
||||||
|
def exact_inference_gradients(self, dL_dKdiag):
|
||||||
|
return dL_dKdiag.sum()
|
||||||
|
|
||||||
def _preprocess_values(self, Y):
|
def _preprocess_values(self, Y):
|
||||||
"""
|
"""
|
||||||
|
|
@ -76,16 +80,12 @@ class Gaussian(Likelihood):
|
||||||
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov=False):
|
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||||
if full_cov:
|
if full_cov:
|
||||||
var += np.eye(var.shape[0])*self.variance
|
var += np.eye(var.shape[0])*self.variance
|
||||||
d = 2*np.sqrt(np.diag(var))
|
|
||||||
low, up = mu - d, mu + d
|
|
||||||
else:
|
else:
|
||||||
var += self.variance
|
var += self.variance
|
||||||
d = 2*np.sqrt(var)
|
return mu, var
|
||||||
low, up = mu - d, mu + d
|
|
||||||
return mu, var, low, up
|
|
||||||
|
|
||||||
def predictive_mean(self, mu, sigma):
|
def predictive_mean(self, mu, sigma):
|
||||||
return mu
|
return mu
|
||||||
|
|
@ -93,6 +93,9 @@ class Gaussian(Likelihood):
|
||||||
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
||||||
return self.variance + sigma**2
|
return self.variance + sigma**2
|
||||||
|
|
||||||
|
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
|
||||||
|
return [stats.norm.ppf(q)*np.sqrt(var) + mu for q in quantiles]
|
||||||
|
|
||||||
def pdf_link(self, link_f, y, extra_data=None):
|
def pdf_link(self, link_f, y, extra_data=None):
|
||||||
"""
|
"""
|
||||||
Likelihood function given link(f)
|
Likelihood function given link(f)
|
||||||
|
|
|
||||||
|
|
@ -135,7 +135,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
return mean
|
return mean
|
||||||
|
|
||||||
def _predictive_variance(self,mu,variance,predictive_mean=None):
|
def _predictive_variance(self, mu,variance, predictive_mean=None):
|
||||||
"""
|
"""
|
||||||
Numerical approximation to the predictive variance: V(Y_star)
|
Numerical approximation to the predictive variance: V(Y_star)
|
||||||
|
|
||||||
|
|
@ -358,7 +358,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov=False, sampling=True, num_samples=10000):
|
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||||
|
|
||||||
|
|
@ -366,14 +366,21 @@ class Likelihood(Parameterized):
|
||||||
:param var: variance of the latent variable, f, of posterior
|
:param var: variance of the latent variable, f, of posterior
|
||||||
:param full_cov: whether to use the full covariance or just the diagonal
|
:param full_cov: whether to use the full covariance or just the diagonal
|
||||||
:type full_cov: Boolean
|
:type full_cov: Boolean
|
||||||
:param num_samples: number of samples to use in computing quantiles and
|
|
||||||
possibly mean variance
|
|
||||||
:type num_samples: integer
|
|
||||||
:param sampling: Whether to use samples for mean and variances anyway
|
|
||||||
:type sampling: Boolean
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
pred_mean = self.predictive_mean(mu, var, Y_metadata)
|
||||||
|
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
|
||||||
|
|
||||||
|
return pred_mean, pred_var
|
||||||
|
|
||||||
|
|
||||||
|
def samples(self, gp):
|
||||||
|
"""
|
||||||
|
Returns a set of samples of observations based on a given value of the latent variable.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
if sampling:
|
if sampling:
|
||||||
#Get gp_samples f* using posterior mean and variance
|
#Get gp_samples f* using posterior mean and variance
|
||||||
if not full_cov:
|
if not full_cov:
|
||||||
|
|
@ -393,20 +400,4 @@ class Likelihood(Parameterized):
|
||||||
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
|
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
|
||||||
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
|
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
pred_mean = self.predictive_mean(mu, var)
|
|
||||||
pred_var = self.predictive_variance(mu, var, pred_mean)
|
|
||||||
print "WARNING: Predictive quantiles are only computed when sampling."
|
|
||||||
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
|
|
||||||
q3 = q1.copy()
|
|
||||||
|
|
||||||
return pred_mean, pred_var, q1, q3
|
|
||||||
|
|
||||||
def samples(self, gp):
|
|
||||||
"""
|
|
||||||
Returns a set of samples of observations based on a given value of the latent variable.
|
|
||||||
|
|
||||||
:param gp: latent variable
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
|
||||||
|
|
@ -3,56 +3,57 @@ from scipy import stats, special
|
||||||
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||||
import link_functions
|
import link_functions
|
||||||
from likelihood import Likelihood
|
from likelihood import Likelihood
|
||||||
|
from gaussian import Gaussian
|
||||||
from ..core.parameterization import Param
|
from ..core.parameterization import Param
|
||||||
from ..core.parameterization.transformations import Logexp
|
from ..core.parameterization.transformations import Logexp
|
||||||
from ..core.parameterization import Parameterized
|
from ..core.parameterization import Parameterized
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
class MixedNoise(Likelihood):
|
class MixedNoise(Likelihood):
|
||||||
def __init__(self, likelihoods_list, noise_index, variance = None, name='mixed_noise'):
|
def __init__(self, likelihoods_list, name='mixed_noise'):
|
||||||
|
|
||||||
Nlike = len(likelihoods_list)
|
|
||||||
self.order = np.unique(noise_index)
|
|
||||||
|
|
||||||
assert self.order.size == Nlike
|
|
||||||
|
|
||||||
if variance is None:
|
|
||||||
variance = np.ones(Nlike)
|
|
||||||
else:
|
|
||||||
assert variance.size == Nlike
|
|
||||||
|
|
||||||
super(Likelihood, self).__init__(name=name)
|
super(Likelihood, self).__init__(name=name)
|
||||||
|
|
||||||
self.add_parameters(*likelihoods_list)
|
self.add_parameters(*likelihoods_list)
|
||||||
self.likelihoods_list = likelihoods_list
|
self.likelihoods_list = likelihoods_list
|
||||||
self.noise_index = noise_index
|
|
||||||
self.log_concave = False
|
self.log_concave = False
|
||||||
self.likelihoods_indices = [noise_index.flatten()==j for j in self.order]
|
|
||||||
|
|
||||||
def covariance_matrix(self, Y, noise_index, **Y_metadata):
|
def update_gradients(self, gradients):
|
||||||
variance = np.zeros(Y.shape[0])
|
self.gradient = gradients
|
||||||
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
|
|
||||||
variance[ind] = lik.variance
|
|
||||||
return np.diag(variance)
|
|
||||||
|
|
||||||
def update_gradients(self, partial, noise_index, **Y_metadata):
|
def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
|
||||||
[lik.update_gradients(partial[ind]) for lik,ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices)]
|
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||||
|
ind = Y_metadata['output_index']
|
||||||
|
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov=False, noise_index=None, **Y_metadata):
|
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||||
_variance = np.array([ self.likelihoods_list[j].variance for j in noise_index ])
|
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
|
||||||
if full_cov:
|
ind = Y_metadata['output_index']
|
||||||
var += np.eye(var.shape[0])*_variance
|
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
|
||||||
d = 2*np.sqrt(np.diag(var))
|
if full_cov:
|
||||||
low, up = mu - d, mu + d
|
var += np.eye(var.shape[0])*_variance
|
||||||
|
d = 2*np.sqrt(np.diag(var))
|
||||||
|
low, up = mu - d, mu + d
|
||||||
|
else:
|
||||||
|
var += _variance
|
||||||
|
d = 2*np.sqrt(var)
|
||||||
|
low, up = mu - d, mu + d
|
||||||
|
return mu, var, low, up
|
||||||
else:
|
else:
|
||||||
var += _variance
|
raise NotImplementedError
|
||||||
d = 2*np.sqrt(var)
|
|
||||||
low, up = mu - d, mu + d
|
|
||||||
return mu, var, low, up
|
|
||||||
|
|
||||||
def predictive_variance(self, mu, sigma, noise_index, predictive_mean=None, **Y_metadata):
|
def predictive_variance(self, mu, sigma, **other_shit):
|
||||||
if isinstance(noise_index,int):
|
if isinstance(noise_index,int):
|
||||||
_variance = self.variance[noise_index]
|
_variance = self.variance[noise_index]
|
||||||
else:
|
else:
|
||||||
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
|
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
|
||||||
return _variance + sigma**2
|
return _variance + sigma**2
|
||||||
|
|
||||||
|
|
||||||
|
def covariance_matrix(self, Y, Y_metadata):
|
||||||
|
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||||
|
variance = np.zeros(Y.shape[0])
|
||||||
|
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
|
||||||
|
variance[ind] = lik.variance
|
||||||
|
return np.diag(variance)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
which_data_ycols='all', fixed_inputs=[],
|
which_data_ycols='all', fixed_inputs=[],
|
||||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||||
plot_raw=False,
|
plot_raw=False,
|
||||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None):
|
||||||
"""
|
"""
|
||||||
Plot the posterior of the GP.
|
Plot the posterior of the GP.
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
|
|
@ -84,17 +84,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
m, v = model._raw_predict(Xgrid)
|
m, v = model._raw_predict(Xgrid)
|
||||||
lower = m - 2*np.sqrt(v)
|
lower = m - 2*np.sqrt(v)
|
||||||
upper = m + 2*np.sqrt(v)
|
upper = m + 2*np.sqrt(v)
|
||||||
Y = Y
|
|
||||||
else:
|
else:
|
||||||
if 'noise_index' in model.Y_metadata.keys():
|
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata)
|
||||||
if np.unique(model.Y_metadata['noise_index'][which_data_rows]).size > 1:
|
|
||||||
print "Data slices choosen have different noise models. Just one will be used."
|
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
|
||||||
noise_index = np.repeat(model.Y_metadata['noise_index'][which_data_rows][0], Xgrid.shape[0])[:,None]
|
|
||||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False,noise_index=noise_index)
|
|
||||||
else:
|
|
||||||
noise_index = None
|
|
||||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False)
|
|
||||||
Y = Y
|
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
||||||
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
||||||
|
|
@ -144,10 +139,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
#predict on the frame and plot
|
#predict on the frame and plot
|
||||||
if plot_raw:
|
if plot_raw:
|
||||||
m, _ = model._raw_predict(Xgrid)
|
m, _ = model._raw_predict(Xgrid)
|
||||||
Y = Y
|
|
||||||
else:
|
else:
|
||||||
m, _, _, _ = model.predict(Xgrid)
|
m, _ = model.predict(Xgrid)
|
||||||
Y = Y
|
|
||||||
for d in which_data_ycols:
|
for d in which_data_ycols:
|
||||||
m_d = m[:,d].reshape(resolution, resolution).T
|
m_d = m[:,d].reshape(resolution, resolution).T
|
||||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
|
|
||||||
|
|
@ -667,8 +667,8 @@ class LaplaceTests(unittest.TestCase):
|
||||||
m2[:] = m1[:]
|
m2[:] = m1[:]
|
||||||
|
|
||||||
#Predict for training points to get posterior mean and variance
|
#Predict for training points to get posterior mean and variance
|
||||||
post_mean, post_var, _, _ = m1.predict(X)
|
post_mean, post_var = m1.predict(X)
|
||||||
post_mean_approx, post_var_approx, _, _ = m2.predict(X)
|
post_mean_approx, post_var_approx, = m2.predict(X)
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,7 @@ class Cacher(object):
|
||||||
if k in kw and kw[k] is not None:
|
if k in kw and kw[k] is not None:
|
||||||
return self.operation(*args, **kw)
|
return self.operation(*args, **kw)
|
||||||
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
||||||
# return self.operation(*args)
|
return self.operation(*args)
|
||||||
|
|
||||||
#if the result is cached, return the cached computation
|
#if the result is cached, return the cached computation
|
||||||
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue