diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7c0c1d18..110f601e 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -74,11 +74,16 @@ class SparseGP(GP): if trigger_update: self.update_model(True) def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) + self.posterior, self._log_marginal_likelihood, self.grad_dict = \ + self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, + self.Y, Y_metadata=self.Y_metadata, + mean_function=self.mean_function) self._update_gradients() def _update_gradients(self): self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) + if self.mean_function is not None: + self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X) if isinstance(self.X, VariationalPosterior): #gradients wrt kernel @@ -112,4 +117,3 @@ class SparseGP(GP): self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self._Zgrad = self.Z.gradient.copy() - diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py index f12ae7a7..d9c29a2f 100644 --- a/GPy/core/sparse_gp_mpi.py +++ b/GPy/core/sparse_gp_mpi.py @@ -34,7 +34,9 @@ class SparseGP_MPI(SparseGP): """ - def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp', Y_metadata=None, mpi_comm=None, normalizer=False): + def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, + mean_function=None, inference_method=None, name='sparse gp', + Y_metadata=None, mpi_comm=None, normalizer=False): self._IN_OPTIMIZATION_ = False if mpi_comm != None: if inference_method is None: @@ -42,12 +44,12 @@ class SparseGP_MPI(SparseGP): else: assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!' - super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) + super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, mean_function=mean_function, name=name, Y_metadata=Y_metadata, normalizer=normalizer) self.update_model(False) - + if variational_prior is not None: self.link_parameter(variational_prior) - + self.mpi_comm = mpi_comm # Manage the data (Y) division if mpi_comm != None: @@ -118,4 +120,3 @@ class SparseGP_MPI(SparseGP): update_gradients(self, mpi_comm=self.mpi_comm) else: super(SparseGP_MPI,self).parameters_changed() - diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index fb0e946b..9870015d 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -61,16 +61,20 @@ class VarDTC(LatentFunctionInference): return jitchol(tdot(Y)) def get_VVTfactor(self, Y, prec): - return Y * prec # TODO chache this, and make it effective + return Y * prec # TODO cache this, and make it effective def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, Z_tilde=None): - assert mean_function is None, "inference with a mean function not implemented" num_data, output_dim = Y.shape num_inducing = Z.shape[0] uncertain_inputs = isinstance(X, VariationalPosterior) + if mean_function is not None: + mean = mean_function.f(X) + else: + mean = 0 + if precision is None: #assume Gaussian likelihood precision = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), self.const_jitter) @@ -78,10 +82,11 @@ class VarDTC(LatentFunctionInference): if precision.ndim == 1: precision = precision[:, None] het_noise = precision.size > 1 + if (het_noise or uncertain_inputs) and mean_function is not None: + raise ValueError('Mean function not implemented with uncertain inputs or heteroscedasticity') - VVT_factor = precision*Y - #VVT_factor = precision*Y - trYYT = self.get_trYYT(Y) + VVT_factor = precision*(Y-mean) + trYYT = self.get_trYYT(Y-mean) # kernel computations, using BGPLVM notation if Lm is None: @@ -128,14 +133,18 @@ class VarDTC(LatentFunctionInference): # factor B B = np.eye(num_inducing) + A LB = jitchol(B) - psi1Vf = np.dot(psi1.T, VVT_factor) + # back substutue C into psi1Vf - tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) - _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(Lm, psi1.T, lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(LB, tmp, lower=1, trans=0) + _LBi_Lmi_psi1Vf = np.dot(_LBi_Lmi_psi1, VVT_factor) tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) # data fit and derivative of L w.r.t. Kmm + dL_dm = -np.dot((_LBi_Lmi_psi1.T.dot(_LBi_Lmi_psi1)) + - np.eye(Y.shape[0]), VVT_factor) + delit = tdot(_LBi_Lmi_psi1Vf) data_fit = np.trace(delit) DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) @@ -181,7 +190,8 @@ class VarDTC(LatentFunctionInference): grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1, - 'dL_dthetaL':dL_dthetaL} + 'dL_dthetaL':dL_dthetaL, + 'dL_dm':dL_dm} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index 893ccff6..8b78ac43 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -30,7 +30,7 @@ class SparseGPRegression(SparseGP_MPI): """ - def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None, name='sparse_gp'): + def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, mean_function=None, normalizer=None, mpi_comm=None, name='sparse_gp'): num_data, input_dim = X.shape # kern defaults to rbf (plus white for stability) @@ -55,7 +55,8 @@ class SparseGPRegression(SparseGP_MPI): else: infr = VarDTC() - SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name) + SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, mean_function=mean_function, + inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm, name=name) def parameters_changed(self): from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py index 816d5488..fa717194 100644 --- a/GPy/testing/inference_tests.py +++ b/GPy/testing/inference_tests.py @@ -49,6 +49,7 @@ class InferenceXTestCase(unittest.TestCase): m.optimize() x, mi = m.infer_newX(m.Y, optimize=True) np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2) + class InferenceGPEP(unittest.TestCase): def genData(self): @@ -132,6 +133,16 @@ class InferenceGPEP(unittest.TestCase): np.sum(p._woodbury_vector - p0._woodbury_vector), np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6) +class VarDtcTest(unittest.TestCase): + + def test_var_dtc_inference_with_mean(self): + """ Check dL_dm in var_dtc is calculated correctly""" + np.random.seed(1) + x = np.linspace(0.,2*np.pi,100)[:,None] + y = -np.cos(x)+np.random.randn(*x.shape)*0.3+1 + m = GPy.models.SparseGPRegression(x,y, mean_function=GPy.mappings.Linear(input_dim=1, output_dim=1)) + self.assertTrue(m.checkgrad()) + class HMCSamplerTest(unittest.TestCase):