mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-05 14:55:15 +02:00
lots of fixes, including prediction being mean and variance only
This commit is contained in:
parent
365b8ae1e1
commit
cc96f5b3d5
13 changed files with 118 additions and 128 deletions
|
|
@ -120,8 +120,12 @@ class GP(Model):
|
|||
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
|
||||
return mean, var, _025pm, _975pm
|
||||
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
|
||||
return mean, var
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(0.025, 0.975), Y_metadata=None):
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
|
||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -54,13 +54,13 @@ class SparseGP(GP):
|
|||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
||||
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||
if isinstance(self.X, VariationalPosterior):
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = self.grad_dict.pop('dL_dKmm')
|
||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||
target = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
|
||||
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
self.kern.gradient += target
|
||||
|
||||
#gradients wrt Z
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ If the likelihood object is something other than Gaussian, then exact inference
|
|||
is not tractable. We then resort to a Laplace approximation (laplace.py) or
|
||||
expectation propagation (ep.py).
|
||||
|
||||
The inference methods return a
|
||||
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
|
||||
The inference methods return a
|
||||
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
|
||||
instance, which is a simple
|
||||
structure which contains a summary of the posterior. The model classes can then
|
||||
use this posterior object for making predictions, optimizing hyper-parameters,
|
||||
|
|
@ -33,13 +33,13 @@ from dtc import DTC
|
|||
from fitc import FITC
|
||||
|
||||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
#
|
||||
#
|
||||
#
|
||||
# class LatentFunctionInference(object):
|
||||
# def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
# """
|
||||
# Do inference on the latent functions given a covariance function `kern`,
|
||||
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
||||
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`.
|
||||
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
|
||||
# """
|
||||
# raise NotImplementedError, "Abstract base class for full inference"
|
||||
# raise NotImplementedError, "Abstract base class for full inference"
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class DTC(object):
|
|||
U = Knm
|
||||
Uy = np.dot(U.T,Y)
|
||||
|
||||
#factor Kmm
|
||||
#factor Kmm
|
||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||
|
||||
# Compute A
|
||||
|
|
@ -78,7 +78,9 @@ class DTC(object):
|
|||
Uv = np.dot(U, v)
|
||||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||
|
|
@ -154,11 +156,8 @@ class vDTC(object):
|
|||
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
|
||||
dL_dR -=beta*trace_term/num_data
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T}
|
||||
|
||||
#update gradients
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
likelihood.update_gradients(dL_dR)
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class ExactGaussianInference(object):
|
|||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, **Y_metadata):
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
|
|
@ -41,12 +41,14 @@ class ExactGaussianInference(object):
|
|||
|
||||
K = kern.K(X)
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, **Y_metadata))
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
|
||||
|
||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||
|
||||
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
||||
|
||||
|
||||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK))
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
|
|
|||
|
|
@ -18,10 +18,6 @@ class FITC(object):
|
|||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y):
|
||||
|
||||
#TODO: MAX! fix this!
|
||||
from ...util.misc import param_to_array
|
||||
Y = param_to_array(Y)
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
@ -36,7 +32,7 @@ class FITC(object):
|
|||
Knm = kern.K(X, Z)
|
||||
U = Knm
|
||||
|
||||
#factor Kmm
|
||||
#factor Kmm
|
||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||
|
||||
#compute beta_star, the effective noise precision
|
||||
|
|
@ -72,7 +68,7 @@ class FITC(object):
|
|||
vvT_P = tdot(v.reshape(-1,1)) + P
|
||||
dL_dK = 0.5*(Kmmi - vvT_P)
|
||||
KiU = np.dot(Kmmi, U.T)
|
||||
dL_dK += np.dot(KiU*dL_dR, KiU.T)
|
||||
dL_dK += np.dot(KiU*dL_dR, KiU.T)
|
||||
|
||||
# Compute dL_dU
|
||||
vY = np.dot(v.reshape(-1,1),Y.T)
|
||||
|
|
@ -80,7 +76,8 @@ class FITC(object):
|
|||
dL_dU *= beta_star
|
||||
dL_dU -= 2.*KiU*dL_dR
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'partial_for_likelihood':dL_dR}
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
|
||||
|
|
|
|||
|
|
@ -137,25 +137,25 @@ class VarDTC(object):
|
|||
psi0, A, LB, trYYT, data_fit)
|
||||
|
||||
#put the gradients in the right places
|
||||
partial_for_likelihood = _compute_partial_for_likelihood(likelihood,
|
||||
dL_dR = _compute_dL_dR(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
|
||||
#likelihood.update_gradients(partial_for_likelihood)
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||
'dL_dpsi0':dL_dpsi0,
|
||||
'dL_dpsi1':dL_dpsi1,
|
||||
'dL_dpsi2':dL_dpsi2,
|
||||
'partial_for_likelihood':partial_for_likelihood}
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||
'dL_dKdiag':dL_dpsi0,
|
||||
'dL_dKnm':dL_dpsi1,
|
||||
'partial_for_likelihood':partial_for_likelihood}
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
|
|
@ -232,7 +232,7 @@ class VarDTCMissingData(object):
|
|||
if uncertain_inputs:
|
||||
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
|
||||
|
||||
partial_for_likelihood = 0
|
||||
dL_dR = 0
|
||||
woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
|
||||
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
|
||||
dL_dKmm = 0
|
||||
|
|
@ -308,7 +308,7 @@ class VarDTCMissingData(object):
|
|||
psi0, A, LB, trYYT, data_fit)
|
||||
|
||||
#put the gradients in the right places
|
||||
partial_for_likelihood += _compute_partial_for_likelihood(likelihood,
|
||||
dL_dR += _compute_dL_dR(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
|
|
@ -334,12 +334,12 @@ class VarDTCMissingData(object):
|
|||
'dL_dpsi0':dL_dpsi0_all,
|
||||
'dL_dpsi1':dL_dpsi1_all,
|
||||
'dL_dpsi2':dL_dpsi2_all,
|
||||
'partial_for_likelihood':partial_for_likelihood}
|
||||
'dL_dR':dL_dR}
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm,
|
||||
'dL_dKdiag':dL_dpsi0_all,
|
||||
'dL_dKnm':dL_dpsi1_all,
|
||||
'partial_for_likelihood':partial_for_likelihood}
|
||||
'dL_dR':dL_dR}
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
|
|
@ -385,11 +385,11 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
|
|||
return dL_dpsi0, dL_dpsi1, dL_dpsi2
|
||||
|
||||
|
||||
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
||||
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT):
|
||||
# the partial derivative vector for the likelihood
|
||||
if likelihood.size == 0:
|
||||
# save computation here.
|
||||
partial_for_likelihood = None
|
||||
dL_dR = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
|
|
@ -399,20 +399,20 @@ def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB,
|
|||
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
|
||||
|
||||
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
dL_dR = -0.5 * beta + 0.5 * likelihood.V**2
|
||||
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
|
||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
|
||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
|
||||
else:
|
||||
# likelihood is not heteroscedatic
|
||||
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
return partial_for_likelihood
|
||||
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
return dL_dR
|
||||
|
||||
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit):
|
||||
#compute log marginal likelihood
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import link_functions
|
|||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from scipy import stats
|
||||
|
||||
class Gaussian(Likelihood):
|
||||
"""
|
||||
|
|
@ -49,11 +50,14 @@ class Gaussian(Likelihood):
|
|||
if isinstance(gp_link, link_functions.Identity):
|
||||
self.log_concave = True
|
||||
|
||||
def covariance_matrix(self, Y, **Y_metadata):
|
||||
def covariance_matrix(self, Y, Y_metadata=None):
|
||||
return np.eye(Y.shape[0]) * self.variance
|
||||
|
||||
def update_gradients(self, partial):
|
||||
self.variance.gradient = np.sum(partial)
|
||||
def update_gradients(self, grad):
|
||||
self.variance.gradient = grad
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag):
|
||||
return dL_dKdiag.sum()
|
||||
|
||||
def _preprocess_values(self, Y):
|
||||
"""
|
||||
|
|
@ -76,16 +80,12 @@ class Gaussian(Likelihood):
|
|||
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False):
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
if full_cov:
|
||||
var += np.eye(var.shape[0])*self.variance
|
||||
d = 2*np.sqrt(np.diag(var))
|
||||
low, up = mu - d, mu + d
|
||||
else:
|
||||
var += self.variance
|
||||
d = 2*np.sqrt(var)
|
||||
low, up = mu - d, mu + d
|
||||
return mu, var, low, up
|
||||
return mu, var
|
||||
|
||||
def predictive_mean(self, mu, sigma):
|
||||
return mu
|
||||
|
|
@ -93,6 +93,9 @@ class Gaussian(Likelihood):
|
|||
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
||||
return self.variance + sigma**2
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
|
||||
return [stats.norm.ppf(q)*np.sqrt(var) + mu for q in quantiles]
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
return mean
|
||||
|
||||
def _predictive_variance(self,mu,variance,predictive_mean=None):
|
||||
def _predictive_variance(self, mu,variance, predictive_mean=None):
|
||||
"""
|
||||
Numerical approximation to the predictive variance: V(Y_star)
|
||||
|
||||
|
|
@ -358,7 +358,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, sampling=True, num_samples=10000):
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
"""
|
||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
|
||||
|
||||
|
|
@ -366,14 +366,21 @@ class Likelihood(Parameterized):
|
|||
:param var: variance of the latent variable, f, of posterior
|
||||
:param full_cov: whether to use the full covariance or just the diagonal
|
||||
:type full_cov: Boolean
|
||||
:param num_samples: number of samples to use in computing quantiles and
|
||||
possibly mean variance
|
||||
:type num_samples: integer
|
||||
:param sampling: Whether to use samples for mean and variances anyway
|
||||
:type sampling: Boolean
|
||||
|
||||
"""
|
||||
|
||||
pred_mean = self.predictive_mean(mu, var, Y_metadata)
|
||||
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
|
||||
|
||||
return pred_mean, pred_var
|
||||
|
||||
|
||||
def samples(self, gp):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if sampling:
|
||||
#Get gp_samples f* using posterior mean and variance
|
||||
if not full_cov:
|
||||
|
|
@ -393,20 +400,4 @@ class Likelihood(Parameterized):
|
|||
q1 = np.percentile(samples, 2.5, axis=axis)[:,None]
|
||||
q3 = np.percentile(samples, 97.5, axis=axis)[:,None]
|
||||
|
||||
else:
|
||||
|
||||
pred_mean = self.predictive_mean(mu, var)
|
||||
pred_var = self.predictive_variance(mu, var, pred_mean)
|
||||
print "WARNING: Predictive quantiles are only computed when sampling."
|
||||
q1 = np.repeat(np.nan,pred_mean.size)[:,None]
|
||||
q3 = q1.copy()
|
||||
|
||||
return pred_mean, pred_var, q1, q3
|
||||
|
||||
def samples(self, gp):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -3,56 +3,57 @@ from scipy import stats, special
|
|||
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from gaussian import Gaussian
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from ..core.parameterization import Parameterized
|
||||
import itertools
|
||||
|
||||
class MixedNoise(Likelihood):
|
||||
def __init__(self, likelihoods_list, noise_index, variance = None, name='mixed_noise'):
|
||||
|
||||
Nlike = len(likelihoods_list)
|
||||
self.order = np.unique(noise_index)
|
||||
|
||||
assert self.order.size == Nlike
|
||||
|
||||
if variance is None:
|
||||
variance = np.ones(Nlike)
|
||||
else:
|
||||
assert variance.size == Nlike
|
||||
def __init__(self, likelihoods_list, name='mixed_noise'):
|
||||
|
||||
super(Likelihood, self).__init__(name=name)
|
||||
|
||||
self.add_parameters(*likelihoods_list)
|
||||
self.likelihoods_list = likelihoods_list
|
||||
self.noise_index = noise_index
|
||||
self.log_concave = False
|
||||
self.likelihoods_indices = [noise_index.flatten()==j for j in self.order]
|
||||
|
||||
def covariance_matrix(self, Y, noise_index, **Y_metadata):
|
||||
variance = np.zeros(Y.shape[0])
|
||||
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
|
||||
variance[ind] = lik.variance
|
||||
return np.diag(variance)
|
||||
def update_gradients(self, gradients):
|
||||
self.gradient = gradients
|
||||
|
||||
def update_gradients(self, partial, noise_index, **Y_metadata):
|
||||
[lik.update_gradients(partial[ind]) for lik,ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices)]
|
||||
def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
|
||||
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||
ind = Y_metadata['output_index']
|
||||
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, noise_index=None, **Y_metadata):
|
||||
_variance = np.array([ self.likelihoods_list[j].variance for j in noise_index ])
|
||||
if full_cov:
|
||||
var += np.eye(var.shape[0])*_variance
|
||||
d = 2*np.sqrt(np.diag(var))
|
||||
low, up = mu - d, mu + d
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
|
||||
ind = Y_metadata['output_index']
|
||||
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
|
||||
if full_cov:
|
||||
var += np.eye(var.shape[0])*_variance
|
||||
d = 2*np.sqrt(np.diag(var))
|
||||
low, up = mu - d, mu + d
|
||||
else:
|
||||
var += _variance
|
||||
d = 2*np.sqrt(var)
|
||||
low, up = mu - d, mu + d
|
||||
return mu, var, low, up
|
||||
else:
|
||||
var += _variance
|
||||
d = 2*np.sqrt(var)
|
||||
low, up = mu - d, mu + d
|
||||
return mu, var, low, up
|
||||
raise NotImplementedError
|
||||
|
||||
def predictive_variance(self, mu, sigma, noise_index, predictive_mean=None, **Y_metadata):
|
||||
def predictive_variance(self, mu, sigma, **other_shit):
|
||||
if isinstance(noise_index,int):
|
||||
_variance = self.variance[noise_index]
|
||||
else:
|
||||
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
|
||||
return _variance + sigma**2
|
||||
|
||||
|
||||
def covariance_matrix(self, Y, Y_metadata):
|
||||
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
|
||||
variance = np.zeros(Y.shape[0])
|
||||
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
|
||||
variance[ind] = lik.variance
|
||||
return np.diag(variance)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -84,17 +84,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
m, v = model._raw_predict(Xgrid)
|
||||
lower = m - 2*np.sqrt(v)
|
||||
upper = m + 2*np.sqrt(v)
|
||||
Y = Y
|
||||
else:
|
||||
if 'noise_index' in model.Y_metadata.keys():
|
||||
if np.unique(model.Y_metadata['noise_index'][which_data_rows]).size > 1:
|
||||
print "Data slices choosen have different noise models. Just one will be used."
|
||||
noise_index = np.repeat(model.Y_metadata['noise_index'][which_data_rows][0], Xgrid.shape[0])[:,None]
|
||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False,noise_index=noise_index)
|
||||
else:
|
||||
noise_index = None
|
||||
m, v, lower, upper = model.predict(Xgrid,full_cov=False)
|
||||
Y = Y
|
||||
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata)
|
||||
|
||||
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
|
||||
|
||||
|
||||
for d in which_data_ycols:
|
||||
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
||||
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
|
||||
|
|
@ -144,10 +139,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
#predict on the frame and plot
|
||||
if plot_raw:
|
||||
m, _ = model._raw_predict(Xgrid)
|
||||
Y = Y
|
||||
else:
|
||||
m, _, _, _ = model.predict(Xgrid)
|
||||
Y = Y
|
||||
m, _ = model.predict(Xgrid)
|
||||
for d in which_data_ycols:
|
||||
m_d = m[:,d].reshape(resolution, resolution).T
|
||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
|
|
|
|||
|
|
@ -667,8 +667,8 @@ class LaplaceTests(unittest.TestCase):
|
|||
m2[:] = m1[:]
|
||||
|
||||
#Predict for training points to get posterior mean and variance
|
||||
post_mean, post_var, _, _ = m1.predict(X)
|
||||
post_mean_approx, post_var_approx, _, _ = m2.predict(X)
|
||||
post_mean, post_var = m1.predict(X)
|
||||
post_mean_approx, post_var_approx, = m2.predict(X)
|
||||
|
||||
if debug:
|
||||
import pylab as pb
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class Cacher(object):
|
|||
if k in kw and kw[k] is not None:
|
||||
return self.operation(*args, **kw)
|
||||
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
|
||||
# return self.operation(*args)
|
||||
return self.operation(*args)
|
||||
|
||||
#if the result is cached, return the cached computation
|
||||
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue