[missing data] general implementation for subsetting data

This commit is contained in:
Max Zwiessele 2014-10-08 12:03:51 +01:00
parent d93fac8c13
commit fa7807ee6f
7 changed files with 329 additions and 108 deletions

View file

@ -9,6 +9,7 @@ from .. import likelihoods
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
import logging import logging
from GPy.inference.latent_function_inference.posterior import Posterior
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
@ -34,12 +35,18 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False):
self.missing_data = missing_data
if self.missing_data:
self.ninan = ~np.isnan(Y)
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC() inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError, "what to do what to do?" raise NotImplementedError, "what to do what to do?"
@ -51,44 +58,200 @@ class SparseGP(GP):
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print message,
for d in xrange(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print ' '*(len(message)+1) + '\r',
message = m_f(d)
print message,
print ''
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def parameters_changed(self): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) """
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) This is the standard part, which usually belongs in parameters_changed.
if isinstance(self.X, VariationalPosterior):
For automatic handling of subsampling (such as missing_data, stochastics etc.), we need to put this into an inner
loop, in order to ensure a different handling of gradients etc of different
subsets of data.
The dict in current_values will be passed aroung as current_values for
the rest of the algorithm, so this is the place to store current values,
such as subsets etc, if necessary.
If Lm and dL_dKmm can be precomputed (or only need to be computed once)
pass them in here, so they will be passed to the inference_method.
subset_indices is a dictionary of indices. you can put the indices however you
like them into this dictionary for inner use of the indices inside the
algorithm.
"""
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel #gradients wrt kernel
dL_dKmm = self.grad_dict['dL_dKmm'] dL_dKmm = grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None) kern.update_gradients_full(dL_dKmm, Z, None)
target = self.kern.gradient.copy() current_values['kerngrad'] = kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X, kern.update_gradients_expectations(variational_posterior=X,
Z=self.Z, Z=Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2']) dL_dpsi2=grad_dict['dL_dpsi2'])
self.kern.gradient += target current_values['kerngrad'] += kern.gradient
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
self.Z.gradient += self.kern.gradients_Z_expectations( current_values['Zgrad'] += kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'], grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'], grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'], grad_dict['dL_dpsi2'],
Z=self.Z, Z=Z,
variational_posterior=self.X) variational_posterior=X)
else: else:
#gradients wrt kernel #gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
target = self.kern.gradient.copy() current_values['kerngrad'] = kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z) kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
target += self.kern.gradient current_values['kerngrad'] += kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
self.kern.gradient += target current_values['kerngrad'] += kern.gradient
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
"""
This is for automatic updates of values in the inner loop of missing
data handling. Both arguments are dictionaries and the values in
full_values will be updated by the current_gradients.
If a key from current_values does not exist in full_values, it will be
initialized to the value in current_values.
If there is indices needed for the update, value_indices can be used for
that. If value_indices has the same key, as current_values, the update
in full_values will be indexed by the indices in value_indices.
grads:
dictionary of standing gradients (you will have to carefully make sure, that
the ordering is right!). The values in here will be updated such that
full_values[key] += current_values[key] forall key in full_gradients.keys()
gradients:
dictionary of gradients in the current set of parameters.
value_indices:
dictionary holding indices for the update in full_values.
if the key exists the update rule is:
full_values[key][value_indices[key]] += current_values[key]
"""
for key in current_values.keys():
if value_indices is not None and value_indices.has_key(key):
index = value_indices[key]
else:
index = slice(None)
if full_values.has_key(key):
full_values[key][index] += current_values[key]
else:
full_values[key] = current_values[key]
def _inner_values_update(self, current_values):
"""
This exists if there is more to do with the current values.
It will be called allways in the inner loop, so that
you can do additional inner updates for the inside of the missing data
loop etc. This can also be used for stochastic updates, when only working on
one dimension of the output.
"""
pass
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
self.likelihood.gradient = full_values['likgrad']
self.kern.gradient = full_values['kerngrad']
self.Z.gradient = full_values['Zgrad']
def _outer_init_full_values(self):
"""
If full_values has indices in values_indices, we might want to initialize
the full_values differently, so that subsetting is possible.
Here you can initialize the full_values for the values needed.
Keep in mind, that if a key does not exist in full_values when updating
values, it will be set (so e.g. for Z there is no need to initialize Zgrad,
as there is no subsetting needed. For X in BGPLVM on the other hand we probably need
to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing)
"""
return {}
def _outer_loop_for_missing_data(self):
Lm = None
dL_dKmm = None
Kmm = None
self._log_marginal_likelihood = 0
full_values = self._outer_init_full_values()
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
m_f = lambda i: "Inference with missing data: {: >7.2%}".format(float(i+1)/self.output_dim)
message = m_f(-1)
print message,
for d in xrange(self.output_dim):
ninan = self.ninan[:, d]
print ' '*(len(message)) + '\r',
message = m_f(d)
print message,
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
self._inner_take_over_or_update(full_values, current_values, value_indices)
self._inner_values_update(current_values)
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
Kmm = posterior._K
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
print ''
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=Kmm, mean=None, cov=None, K_chol=Lm)
self._outer_values_update(full_values)
def parameters_changed(self):
if self.missing_data:
self._outer_loop_for_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, gradients, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self.kern.gradient = gradients['kerngrad']
self.Z.gradient = gradients['Zgrad']
def _raw_predict(self, Xnew, full_cov=False, kern=None): def _raw_predict(self, Xnew, full_cov=False, kern=None):
""" """

View file

@ -34,15 +34,15 @@ class SparseGP_MPI(SparseGP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False): def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False, missing_data=False):
self._IN_OPTIMIZATION_ = False self._IN_OPTIMIZATION_ = False
if mpi_comm != None: if mpi_comm != None:
if inference_method is None: if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm) inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else: else:
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!' assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer, missing_data=missing_data)
self.update_model(False) self.update_model(False)
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
if variational_prior is not None: if variational_prior is not None:
@ -75,18 +75,18 @@ class SparseGP_MPI(SparseGP):
return dc return dc
#===================================================== #=====================================================
# The MPI parallelization # The MPI parallelization
# - can move to model at some point # - can move to model at some point
#===================================================== #=====================================================
@SparseGP.optimizer_array.setter @SparseGP.optimizer_array.setter
def optimizer_array(self, p): def optimizer_array(self, p):
if self.mpi_comm != None: if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0: if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0) self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0) self.mpi_comm.Bcast(p, root=0)
SparseGP.optimizer_array.fset(self,p) SparseGP.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None: if self.mpi_comm==None:

View file

@ -367,14 +367,14 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, .8, size=Y.shape).astype(bool) # 80% missing data inan = _np.random.binomial(1, .2, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy() Ymissing = Y.copy()
Ymissing[inan] = _np.nan Ymissing[inan] = _np.nan
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing, m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
inference_method=VarDTCMissingData(inan=inan), kernel=k) kernel=k, missing_data=True)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) m.X.variance[:] = _np.random.uniform(0,.1,m.X.shape)
m.likelihood.variance = .01 m.likelihood.variance = .01
m.parameters_changed() m.parameters_changed()
m.Yreal = Y m.Yreal = Y

View file

@ -62,7 +62,9 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape _, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior) uncertain_inputs = isinstance(X, VariationalPosterior)
@ -84,8 +86,8 @@ class VarDTC(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) if Lm is None:
Lm = jitchol(Kmm)
# The rather complex computations of A, and the psi stats # The rather complex computations of A, and the psi stats
if uncertain_inputs: if uncertain_inputs:
@ -100,7 +102,6 @@ class VarDTC(LatentFunctionInference):
else: else:
psi0 = kern.Kdiag(X) psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z) psi1 = kern.K(X, Z)
psi2 = None
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else: else:
@ -122,11 +123,12 @@ class VarDTC(LatentFunctionInference):
delit = tdot(_LBi_Lmi_psi1Vf) delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit) data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi if dL_dKmm is None:
delit += -0.5 * B * output_dim delit = -0.5 * DBi_plus_BiPBi
delit += output_dim * np.eye(num_inducing) delit += -0.5 * B * output_dim
# Compute dL_dKmm delit += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, delit) # Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi # derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
@ -208,6 +210,7 @@ class VarDTCMissingData(LatentFunctionInference):
inan = np.isnan(Y) inan = np.isnan(Y)
has_none = inan.any() has_none = inan.any()
self._inan = ~inan self._inan = ~inan
inan = self._inan
else: else:
inan = self._inan inan = self._inan
has_none = True has_none = True
@ -241,7 +244,7 @@ class VarDTCMissingData(LatentFunctionInference):
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()] return [Y], [(Y**2).sum()]
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0_all = kern.psi0(Z, X) psi0_all = kern.psi0(Z, X)
@ -260,7 +263,7 @@ class VarDTCMissingData(LatentFunctionInference):
dL_dpsi0_all = np.zeros(Y.shape[0]) dL_dpsi0_all = np.zeros(Y.shape[0])
dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing)) dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing))
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2_all = np.zeros((num_inducing, num_inducing)) dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
dL_dR = 0 dL_dR = 0
woodbury_vector = np.zeros((num_inducing, Y.shape[1])) woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
@ -271,31 +274,31 @@ class VarDTCMissingData(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
#factor Kmm #factor Kmm
Lm = jitchol(Kmm) if Lm is None:
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
size = Y.shape[1] size = len(Ys)
next_ten = 0 next_ten = 0
for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)): for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
if ((i+1.)/size) >= next_ten: if ((i+1.)/size) >= next_ten:
logger.info('inference {:> 6.1%}'.format((i+1.)/size)) logger.info('inference {:> 6.1%}'.format((i+1.)/size))
next_ten += .1 next_ten += .1
if het_noise: beta = beta_all[i] if het_noise: beta = beta_all[i]
else: beta = beta_all else: beta = beta_all
VVT_factor = (y*beta) VVT_factor = (y*beta)
output_dim = 1#len(ind) output_dim = 1#len(ind)
psi0 = psi0_all[v] psi0 = psi0_all[v]
psi1 = psi1_all[v, :] psi1 = psi1_all[v, :]
if uncertain_inputs: psi2 = kern.psi2(Z, X[v, :]) if uncertain_inputs:
psi2 = kern.psi2(Z, X[v, :])
else: psi2 = None else: psi2 = None
num_data = psi1.shape[0] num_data = psi1.shape[0]
if uncertain_inputs: if uncertain_inputs:
if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else: psi2_beta = psi2 * beta else: psi2_beta = psi2 * (beta)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
@ -330,11 +333,11 @@ class VarDTCMissingData(LatentFunctionInference):
dL_dpsi0_all[v] += dL_dpsi0 dL_dpsi0_all[v] += dL_dpsi0
dL_dpsi1_all[v, :] += dL_dpsi1 dL_dpsi1_all[v, :] += dL_dpsi1
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2_all += dL_dpsi2 dL_dpsi2_all[v, :, :] += dL_dpsi2
# log marginal likelihood # log marginal likelihood
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit,VVT_factor) psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places #put the gradients in the right places
dL_dR += _compute_dL_dR(likelihood, dL_dR += _compute_dL_dR(likelihood,
@ -393,7 +396,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
# subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2 return dL_dpsi0, dL_dpsi1, dL_dpsi2

View file

@ -17,7 +17,7 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
# _psi1 NxM # _psi1 NxM
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S) psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
@ -41,7 +41,7 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N _psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.) _psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log) _psi1 = variance*np.exp(_psi1_log)
return _psi1 return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S): def __psi2computations(variance, lengthscale, Z, mu, S):
@ -54,27 +54,27 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2) denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2 dL_dlengscale = dl_psi1 + dl_psi2
if not ARD: if not ARD:
dL_dlengscale = dL_dlengscale.sum() dL_dlengscale = dL_dlengscale.sum()
@ -82,7 +82,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
dL_dmu = dmu_psi1 + dmu_psi2 dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2 dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2 dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
@ -101,9 +101,9 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S) _psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1 Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
@ -114,7 +114,7 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2. _dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom) _dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale) _dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
@ -133,13 +133,12 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2) denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom) denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM _psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2[None,:,:]*_psi2 Lpsi2 = dL_dpsi2[None,:,:]*_psi2
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
@ -147,7 +146,7 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2 Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance _dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat) _dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None] _dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
@ -157,6 +156,6 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0) (2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1) _psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1) _psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -25,7 +25,9 @@ class BayesianGPLVM(SparseGP_MPI):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None): Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False):
self.mpi_comm = mpi_comm self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False self.__IN_OPTIMIZATION__ = False
@ -59,24 +61,23 @@ class BayesianGPLVM(SparseGP_MPI):
X = NormalPosterior(X, X_variance) X = NormalPosterior(X, X_variance)
if inference_method is None: if inference_method is None:
inan = np.isnan(Y) if mpi_comm is not None:
if np.any(inan):
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
self.logger.debug("creating inference_method with var_dtc missing data")
inference_method = VarDTCMissingData(inan=inan)
elif mpi_comm is not None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm) inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else: else:
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC() inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if isinstance(inference_method,VarDTC_minibatch): if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm inference_method.mpi_comm = mpi_comm
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU): if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True kernel.psicomp.GPU_direct = True
super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood, name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior) super(BayesianGPLVM,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method,
normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
missing_data=missing_data)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
@ -86,15 +87,48 @@ class BayesianGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVM, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices)
log_marginal_likelihood -= self.variational_prior.KL_divergence(X)
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
current_values['meangrad'] += X.mean.gradient
current_values['vargrad'] += X.variance.gradient
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _outer_values_update(self, full_values):
"""
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
super(BayesianGPLVM, self)._outer_values_update(full_values)
self.X.mean.gradient = full_values['meangrad']
self.X.variance.gradient = full_values['vargrad']
def _outer_init_full_values(self):
return dict(meangrad=np.zeros(self.X.mean.shape),
vargrad=np.zeros(self.X.variance.shape))
def parameters_changed(self): def parameters_changed(self):
super(BayesianGPLVM,self).parameters_changed() super(BayesianGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_minibatch):
return return
super(BayesianGPLVM, self).parameters_changed() #super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) #self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) #self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# This is testing code ------------------------- # This is testing code -------------------------
# i = np.random.randint(self.X.shape[0]) # i = np.random.randint(self.X.shape[0])
@ -113,7 +147,7 @@ class BayesianGPLVM(SparseGP_MPI):
# ----------------------------------------------- # -----------------------------------------------
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) #self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
@ -178,7 +212,7 @@ class BayesianGPLVM(SparseGP_MPI):
""" """
dmu_dX = np.zeros_like(Xnew) dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) dmu_dX += self.kern.gradients_X(self.grad_dict['dL_dpsi1'][i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX return dmu_dX
def dmu_dXnew(self, Xnew): def dmu_dXnew(self, Xnew):
@ -189,7 +223,7 @@ class BayesianGPLVM(SparseGP_MPI):
ones = np.ones((1, 1)) ones = np.ones((1, 1))
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(gradients_X, self.Cpsi1Vf) return np.dot(gradients_X, self.grad_dict['dL_dpsi1'])
def plot_steepest_gradient_map(self, *args, ** kwargs): def plot_steepest_gradient_map(self, *args, ** kwargs):
""" """

View file

@ -20,7 +20,7 @@ class MiscTests(unittest.TestCase):
m = GPy.models.GPRegression(self.X, self.Y, kernel=k) m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.randomize() m.randomize()
m.likelihood.variance = .5 m.likelihood.variance = .5
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance) Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N) * m.likelihood.variance)
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new)) K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized) mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
@ -43,7 +43,7 @@ class MiscTests(unittest.TestCase):
Z = m.Z[:] Z = m.Z[:]
X = self.X[:] X = self.X[:]
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression # Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv Kinv = m.posterior.woodbury_inv
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new)) K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
@ -51,13 +51,13 @@ class MiscTests(unittest.TestCase):
self.assertEquals(mu.shape, (self.N_new, self.D)) self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new)) self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar) np.testing.assert_almost_equal(K_hat, covar)
#np.testing.assert_almost_equal(mu_hat, mu) # np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new) mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D)) self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1)) self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var) np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
#np.testing.assert_almost_equal(mu_hat, mu) # np.testing.assert_almost_equal(mu_hat, mu)
def test_likelihood_replicate(self): def test_likelihood_replicate(self):
m = GPy.models.GPRegression(self.X, self.Y) m = GPy.models.GPRegression(self.X, self.Y)
@ -110,6 +110,29 @@ class MiscTests(unittest.TestCase):
m2.kern.lengthscale = m.kern['.*lengthscale'] m2.kern.lengthscale = m.kern['.*lengthscale']
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
def test_missing_data(self):
from GPy import kern
from GPy.models import BayesianGPLVM
from GPy.examples.dimensionality_reduction import _simulate_matern
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False)
Y = Ylist[0]
inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = np.nan
k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
def test_likelihood_replicate_kern(self): def test_likelihood_replicate_kern(self):
m = GPy.models.GPRegression(self.X, self.Y) m = GPy.models.GPRegression(self.X, self.Y)
m2 = GPy.models.GPRegression(self.X, self.Y) m2 = GPy.models.GPRegression(self.X, self.Y)
@ -158,7 +181,7 @@ class MiscTests(unittest.TestCase):
def test_model_optimize(self): def test_model_optimize(self):
X = np.random.uniform(-3., 3., (20, 1)) X = np.random.uniform(-3., 3., (20, 1))
Y = np.sin(X) + np.random.randn(20, 1) * 0.05 Y = np.sin(X) + np.random.randn(20, 1) * 0.05
m = GPy.models.GPRegression(X,Y) m = GPy.models.GPRegression(X, Y)
m.optimize() m.optimize()
print m print m
@ -219,8 +242,8 @@ class GradientTests(np.testing.TestCase):
mlp = GPy.kern.MLP(1) mlp = GPy.kern.MLP(1)
self.check_model(mlp, model_type='GPRegression', dimension=1) self.check_model(mlp, model_type='GPRegression', dimension=1)
#TODO: # TODO:
#def test_GPRegression_poly_1d(self): # def test_GPRegression_poly_1d(self):
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data ''' # ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
# mlp = GPy.kern.Poly(1, degree=5) # mlp = GPy.kern.Poly(1, degree=5)
# self.check_model(mlp, model_type='GPRegression', dimension=1) # self.check_model(mlp, model_type='GPRegression', dimension=1)
@ -417,11 +440,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_heteroscedastic_regression(self): def test_gp_heteroscedastic_regression(self):
num_obs = 25 num_obs = 25
X = np.random.randint(0,140,num_obs) X = np.random.randint(0, 140, num_obs)
X = X[:,None] X = X[:, None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern) m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self): def test_gp_kronecker_gaussian(self):
@ -432,12 +455,12 @@ class GradientTests(np.testing.TestCase):
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1) k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1) k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
Y = np.random.randn(N1, N2) Y = np.random.randn(N1, N2)
Y = Y-Y.mean(0) Y = Y - Y.mean(0)
Y = Y/Y.std(0) Y = Y / Y.std(0)
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2) m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
# build the model the dumb way # build the model the dumb way
assert (N1*N2<1000), "too much data for standard GPs!" assert (N1 * N2 < 1000), "too much data for standard GPs!"
yy, xx = np.meshgrid(X2, X1) yy, xx = np.meshgrid(X2, X1)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1]) kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
@ -458,11 +481,11 @@ class GradientTests(np.testing.TestCase):
def test_gp_VGPC(self): def test_gp_VGPC(self):
num_obs = 25 num_obs = 25
X = np.random.randint(0,140,num_obs) X = np.random.randint(0, 140, num_obs)
X = X[:,None] X = X[:, None]
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern) m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())