mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
merging last master
This commit is contained in:
commit
1a02c65a61
133 changed files with 13282 additions and 9562 deletions
|
|
@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
|
|||
import logging
|
||||
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
|
||||
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||
"""
|
||||
|
|
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||
return X.mean.gradient, X.variance.gradient
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=X,
|
||||
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
else:
|
||||
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
|
||||
if subset_indices is not None:
|
||||
value_indices['Xgrad'] = subset_indices['samples']
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if self.has_uncertain_inputs():
|
||||
if self.missing_data:
|
||||
d = self.output_dim
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||
else:
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||
|
||||
# Subsetting Variational Posterior objects, makes the gradients
|
||||
# empty. We need them to be 0 though:
|
||||
X.mean.gradient[:] = 0
|
||||
X.variance.gradient[:] = 0
|
||||
|
||||
self.variational_prior.update_gradients_KL(X)
|
||||
if self.missing_data:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||
else:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||
|
||||
if subset_indices is not None:
|
||||
value_indices['meangrad'] = subset_indices['samples']
|
||||
value_indices['vargrad'] = subset_indices['samples']
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _outer_values_update(self, full_values):
|
||||
"""
|
||||
|
|
@ -128,22 +93,47 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""
|
||||
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient = full_values['meangrad']
|
||||
self.X.variance.gradient = full_values['vargrad']
|
||||
meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
|
||||
self.X.mean.gradient = meangrad_tmp
|
||||
self.X.variance.gradient = vargrad_tmp
|
||||
else:
|
||||
self.X.gradient = full_values['Xgrad']
|
||||
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
if self.has_uncertain_inputs():
|
||||
return dict(meangrad=np.zeros(self.X.mean.shape),
|
||||
vargrad=np.zeros(self.X.variance.shape))
|
||||
else:
|
||||
return dict(Xgrad=np.zeros(self.X.shape))
|
||||
return super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||
|
||||
def parameters_changed(self):
|
||||
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if kl_fctr > 0:
|
||||
Xgrad = self.X.gradient.copy()
|
||||
self.X.gradient[:] = 0
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
|
||||
else:
|
||||
d = self.output_dim
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
|
||||
self.X.gradient += Xgrad
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
elif self.stochastics:
|
||||
d = self.output_dim
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||
|
||||
self._Xgrad = self.X.gradient.copy()
|
||||
|
||||
def plot_latent(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from ..models import GPLVM
|
||||
from ..mappings import *
|
||||
from . import GPLVM
|
||||
from .. import mappings
|
||||
|
||||
|
||||
class BCGPLVM(GPLVM):
|
||||
|
|
@ -16,33 +16,31 @@ class BCGPLVM(GPLVM):
|
|||
:type Y: np.ndarray
|
||||
:param input_dim: latent dimensionality
|
||||
:type input_dim: int
|
||||
:param init: initialisation method for the latent space
|
||||
:type init: 'PCA'|'random'
|
||||
:param mapping: mapping for back constraint
|
||||
:type mapping: GPy.core.Mapping object
|
||||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
|
||||
|
||||
def __init__(self, Y, input_dim, kernel=None, mapping=None):
|
||||
|
||||
|
||||
if mapping is None:
|
||||
mapping = Kernel(X=Y, output_dim=input_dim)
|
||||
mapping = mappings.MLP(input_dim=Y.shape[1],
|
||||
output_dim=input_dim,
|
||||
hidden_dim=10)
|
||||
else:
|
||||
assert mapping.input_dim==Y.shape[1], "mapping input dim does not work for Y dimension"
|
||||
assert mapping.output_dim==input_dim, "mapping output dim does not work for self.input_dim"
|
||||
GPLVM.__init__(self, Y, input_dim, X=mapping.f(Y), kernel=kernel, name="bcgplvm")
|
||||
self.unlink_parameter(self.X)
|
||||
self.mapping = mapping
|
||||
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
|
||||
self.X = self.mapping.f(self.likelihood.Y)
|
||||
self.link_parameter(self.mapping)
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.mapping._get_param_names() + GP._get_param_names(self)
|
||||
self.X = self.mapping.f(self.Y)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
|
||||
def parameters_changed(self):
|
||||
self.X = self.mapping.f(self.Y)
|
||||
GP.parameters_changed(self)
|
||||
Xgradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||
self.mapping.update_gradients(Xgradient, self.Y)
|
||||
|
||||
def _set_params(self, x):
|
||||
self.mapping._set_params(x[:self.mapping.num_params])
|
||||
self.X = self.mapping.f(self.likelihood.Y)
|
||||
GP._set_params(self, x[self.mapping.num_params:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
|
||||
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
|
||||
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ class GPHeteroscedasticRegression(GP):
|
|||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
|
||||
NB: This model does not make inference on the noise outside the training set
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None):
|
||||
|
||||
|
|
@ -30,10 +32,7 @@ class GPHeteroscedasticRegression(GP):
|
|||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
#Likelihood
|
||||
#likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)]
|
||||
noise_terms = np.unique(Y_metadata['output_index'].flatten())
|
||||
likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms]
|
||||
likelihood = likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
|
||||
likelihood = likelihoods.HeteroscedasticGaussian(Y_metadata)
|
||||
|
||||
super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.model import Model
|
||||
|
|
|
|||
|
|
@ -26,12 +26,12 @@ class GPRegression(GP):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
|
||||
likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,20 +1,17 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from scipy.special import erf
|
||||
from ..core.model import Model
|
||||
from ..core import GP
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import kern
|
||||
from ..core.parameterization.param import Param
|
||||
from ..util.linalg import pdinv
|
||||
from ..likelihoods import Gaussian
|
||||
from ..inference.latent_function_inference import VarGauss
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GPVariationalGaussianApproximation(Model):
|
||||
class GPVariationalGaussianApproximation(GP):
|
||||
"""
|
||||
The Variational Gaussian Approximation revisited
|
||||
|
||||
|
|
@ -26,70 +23,14 @@ class GPVariationalGaussianApproximation(Model):
|
|||
pages = {786--792},
|
||||
}
|
||||
"""
|
||||
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
|
||||
Model.__init__(self,'Variational GP')
|
||||
if likelihood is None:
|
||||
likelihood = Gaussian()
|
||||
# accept the construction arguments
|
||||
self.X = ObsAr(X)
|
||||
self.Y = Y
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
self.Y_metadata = Y_metadata
|
||||
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
|
||||
|
||||
self.kern = kernel
|
||||
self.likelihood = likelihood
|
||||
self.link_parameter(self.kern)
|
||||
self.link_parameter(self.likelihood)
|
||||
num_data = Y.shape[0]
|
||||
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
|
||||
self.beta = Param('beta', np.ones(num_data))
|
||||
|
||||
inf = VarGauss(self.alpha, self.beta)
|
||||
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
|
||||
|
||||
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
|
||||
self.beta = Param('beta', np.ones(self.num_data))
|
||||
self.link_parameter(self.alpha)
|
||||
self.link_parameter(self.beta)
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_lik
|
||||
|
||||
def parameters_changed(self):
|
||||
K = self.kern.K(self.X)
|
||||
m = K.dot(self.alpha)
|
||||
KB = K*self.beta[:, None]
|
||||
BKB = KB*self.beta[None, :]
|
||||
A = np.eye(self.num_data) + BKB
|
||||
Ai, LA, _, Alogdet = pdinv(A)
|
||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||
var = np.diag(Sigma).reshape(-1,1)
|
||||
|
||||
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
|
||||
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
|
||||
dF_da = np.dot(K, dF_dm)
|
||||
SigmaB = Sigma*self.beta
|
||||
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
|
||||
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
|
||||
dKL_da = m
|
||||
A_A2 = Ai - Ai.dot(Ai)
|
||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||
self._log_lik = F.sum() - KL
|
||||
self.alpha.gradient = dF_da - dKL_da
|
||||
self.beta.gradient = dF_db - dKL_db
|
||||
|
||||
# K-gradients
|
||||
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
|
||||
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
||||
|
||||
def _raw_predict(self, Xnew):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
"""
|
||||
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
|
||||
Kux = self.kern.K(self.X, Xnew)
|
||||
mu = np.dot(Kux.T, self.alpha)
|
||||
WiKux = np.dot(Wi, Kux)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(WiKux*Kux, 0)
|
||||
|
||||
return mu, var.reshape(-1,1)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ class GPLVM(GP):
|
|||
likelihood = Gaussian()
|
||||
|
||||
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
|
||||
|
||||
self.X = Param('latent_mean', X)
|
||||
self.link_parameter(self.X, index=0)
|
||||
|
||||
|
|
@ -43,27 +44,30 @@ class GPLVM(GP):
|
|||
super(GPLVM, self).parameters_changed()
|
||||
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||
|
||||
def jacobian(self,X):
|
||||
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
for i in range(self.output_dim):
|
||||
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
||||
return J
|
||||
#def jacobian(self,X):
|
||||
# J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
# for i in range(self.output_dim):
|
||||
# J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
||||
# return J
|
||||
|
||||
def magnification(self,X):
|
||||
target=np.zeros(X.shape[0])
|
||||
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
J = self.jacobian(X)
|
||||
for i in range(X.shape[0]):
|
||||
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
||||
return target
|
||||
#def magnification(self,X):
|
||||
# target=np.zeros(X.shape[0])
|
||||
# #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
## J = self.jacobian(X)
|
||||
# for i in range(X.shape[0]):
|
||||
# target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
||||
# return target
|
||||
|
||||
def plot(self):
|
||||
assert self.likelihood.Y.shape[1] == 2
|
||||
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
|
||||
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
|
||||
from matplotlib import pyplot as plt
|
||||
plt.scatter(self.Y[:, 0],
|
||||
self.Y[:, 1],
|
||||
40, self.X[:, 0].copy(),
|
||||
linewidth=0, cmap=plt.cm.jet)
|
||||
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
|
||||
mu, _ = self.predict(Xnew)
|
||||
import pylab as pb
|
||||
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||
plt.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||
|
||||
def plot_latent(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
|
|
@ -78,6 +82,3 @@ class GPLVM(GP):
|
|||
resolution, ax, marker, s,
|
||||
fignum, False, legend,
|
||||
plot_limits, aspect, updates, **kwargs)
|
||||
|
||||
def plot_magnification(self, *args, **kwargs):
|
||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ class HessianChecker(GradientChecker):
|
|||
print(grad_string)
|
||||
|
||||
if plot:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
fig, axes = pb.subplots(2, 2)
|
||||
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
|
|
|
|||
|
|
@ -170,20 +170,19 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
self._log_marginal_likelihood += b._log_marginal_likelihood
|
||||
|
||||
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
||||
self.Z.gradient[:] += b.full_values['Zgrad']
|
||||
grad_dict = b.full_values
|
||||
self.Z.gradient[:] += b.Z.gradient#full_values['Zgrad']
|
||||
#grad_dict = b.full_values
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient += grad_dict['meangrad']
|
||||
self.X.variance.gradient += grad_dict['vargrad']
|
||||
self.X.gradient += b._Xgrad
|
||||
else:
|
||||
self.X.gradient += grad_dict['Xgrad']
|
||||
self.X.gradient += b._Xgrad
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
# update for the KL divergence
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
pass
|
||||
#if self.has_uncertain_inputs():
|
||||
# # update for the KL divergence
|
||||
# self.variational_prior.update_gradients_KL(self.X)
|
||||
# self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
# pass
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
|
||||
|
||||
# pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
|
|
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
|
||||
if stochastic and missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif stochastic and not missing_data:
|
||||
self.missing_data = False
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPMissing(self)
|
||||
|
|
@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
|
||||
"""
|
||||
This is the standard part, which usually belongs in parameters_changed.
|
||||
|
||||
|
|
@ -99,47 +99,13 @@ class SparseGPMiniBatch(SparseGP):
|
|||
like them into this dictionary for inner use of the indices inside the
|
||||
algorithm.
|
||||
"""
|
||||
try:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
|
||||
except:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
|
||||
current_values = {}
|
||||
likelihood.update_gradients(grad_dict['dL_dthetaL'])
|
||||
current_values['likgrad'] = likelihood.gradient.copy()
|
||||
if subset_indices is None:
|
||||
subset_indices = {}
|
||||
if isinstance(X, VariationalPosterior):
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
kern.update_gradients_full(dL_dKmm, Z, None)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_expectations(variational_posterior=X,
|
||||
Z=Z,
|
||||
dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
|
||||
current_values['Zgrad'] += kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi0'],
|
||||
grad_dict['dL_dpsi1'],
|
||||
grad_dict['dL_dpsi2'],
|
||||
Z=Z,
|
||||
variational_posterior=X)
|
||||
if psi2 is None:
|
||||
psi2_sum_n = None
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
|
||||
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
|
||||
psi2_sum_n = psi2.sum(axis=0)
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||
"""
|
||||
|
|
@ -173,7 +139,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
else:
|
||||
index = slice(None)
|
||||
if key in full_values:
|
||||
full_values[key][index] += current_values[key]
|
||||
try:
|
||||
full_values[key][index] += current_values[key]
|
||||
except:
|
||||
full_values[key] += current_values[key]
|
||||
else:
|
||||
full_values[key] = current_values[key]
|
||||
|
||||
|
|
@ -192,9 +161,41 @@ class SparseGPMiniBatch(SparseGP):
|
|||
Here you put the values, which were collected before in the right places.
|
||||
E.g. set the gradients of parameters, etc.
|
||||
"""
|
||||
self.likelihood.gradient = full_values['likgrad']
|
||||
self.kern.gradient = full_values['kerngrad']
|
||||
self.Z.gradient = full_values['Zgrad']
|
||||
if self.has_uncertain_inputs():
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = full_values['dL_dKmm']
|
||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'])
|
||||
self.kern.gradient += kgrad
|
||||
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'])
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
||||
kgrad += self.kern.gradient
|
||||
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
||||
self.kern.gradient += kgrad
|
||||
#kgrad += self.kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
||||
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
||||
|
||||
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
"""
|
||||
|
|
@ -209,7 +210,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
to initialize the gradients for the mean and the variance in order to
|
||||
have the full gradient for indexing)
|
||||
"""
|
||||
return {}
|
||||
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||
if self.has_uncertain_inputs():
|
||||
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
|
||||
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
|
||||
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
|
||||
else:
|
||||
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
|
||||
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
|
||||
return retd
|
||||
|
||||
def _outer_loop_for_missing_data(self):
|
||||
Lm = None
|
||||
|
|
@ -231,28 +240,36 @@ class SparseGPMiniBatch(SparseGP):
|
|||
print(message, end=' ')
|
||||
|
||||
for d, ninan in self.stochastics.d:
|
||||
|
||||
if not self.stochastics:
|
||||
print(' '*(len(message)) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print(message, end=' ')
|
||||
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||
psi0ni = self.psi0[ninan]
|
||||
psi1ni = self.psi1[ninan]
|
||||
if self.has_uncertain_inputs():
|
||||
psi2ni = self.psi2[ninan]
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
|
||||
else:
|
||||
psi2ni = None
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||
|
||||
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
|
||||
self.kern, self.X[ninan],
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[ninan][:, d], self.Y_metadata,
|
||||
Lm, dL_dKmm,
|
||||
subset_indices=dict(outputs=d, samples=ninan))
|
||||
psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
|
||||
|
||||
self._inner_take_over_or_update(self.full_values, current_values, value_indices)
|
||||
self._inner_values_update(current_values)
|
||||
# Fill out the full values by adding in the apporpriate grad_dict
|
||||
# values
|
||||
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
||||
self._inner_values_update(grad_dict) # What is this for? -> MRD
|
||||
|
||||
Lm = posterior.K_chol
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
|
||||
if not self.stochastics:
|
||||
print('')
|
||||
|
||||
|
|
@ -260,10 +277,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
self._outer_values_update(self.full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.kern.return_psi2_n = False
|
||||
|
||||
def _outer_loop_without_missing_data(self):
|
||||
self._log_marginal_likelihood = 0
|
||||
|
||||
if self.posterior is None:
|
||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||
|
|
@ -271,17 +288,16 @@ class SparseGPMiniBatch(SparseGP):
|
|||
woodbury_inv = self.posterior._woodbury_inv
|
||||
woodbury_vector = self.posterior._woodbury_vector
|
||||
|
||||
d = self.stochastics.d
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, self.full_values, _ = self._inner_parameters_changed(
|
||||
d = self.stochastics.d[0][0]
|
||||
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
||||
self.kern, self.X,
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[:, d], self.Y_metadata)
|
||||
self.grad_dict = grad_dict
|
||||
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
self._log_marginal_likelihood = log_marginal_likelihood
|
||||
|
||||
self._outer_values_update(self.full_values)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
|
|
@ -290,10 +306,23 @@ class SparseGPMiniBatch(SparseGP):
|
|||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
|
||||
def parameters_changed(self):
|
||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||
if self.has_uncertain_inputs():
|
||||
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||
self.psi0 = self.kern.psi0(self.Z, self.X)
|
||||
self.psi1 = self.kern.psi1(self.Z, self.X)
|
||||
self.psi2 = self.kern.psi2n(self.Z, self.X)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.X, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
if self.missing_data:
|
||||
self._outer_loop_for_missing_data()
|
||||
elif self.stochastics:
|
||||
self._outer_loop_without_missing_data()
|
||||
else:
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.full_values)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue