GPy/GPy/core/sparse_gp.py

# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)

import numpy as np
from .gp import GP
from .parameterization.param import Param
from ..inference.latent_function_inference import var_dtc
from .. import likelihoods
from GPy.core.parameterization.variational import VariationalPosterior

import logging
logger = logging.getLogger("sparse gp")

class SparseGP(GP):
    """
    A general purpose Sparse GP model

    This model allows (approximate) inference using variational DTC or FITC
    (Gaussian likelihoods) as well as non-conjugate sparse methods based on
    these.

    This is not for missing data, as the implementation for missing data involves
    some inefficient optimization routine decisions.
    See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.

    :param X: inputs
    :type X: np.ndarray (num_data x input_dim)
    :param likelihood: a likelihood instance, containing the observed data
    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
    :param kernel: the kernel (covariance function). See link kernels
    :type kernel: a GPy.kern.kern instance
    :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
    :type X_variance: np.ndarray (num_data x input_dim) | None
    :param Z: inducing inputs
    :type Z: np.ndarray (num_inducing x input_dim)
    :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
    :type num_inducing: int

    """

    def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, X_variance=None, inference_method=None,
                 name='sparse gp', Y_metadata=None, normalizer=False):

        #pick a sensible inference method
        if inference_method is None:
            if isinstance(likelihood, likelihoods.Gaussian):
                inference_method = var_dtc.VarDTC(limit=3)
            else:
                #inference_method = ??
                raise NotImplementedError("what to do what to do?")
            print(("defaulting to ", inference_method, "for latent function inference"))

        self.Z = Param('inducing inputs', Z)
        self.num_inducing = Z.shape[0]

        GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)

        logger.info("Adding Z as parameter")
        self.link_parameter(self.Z, index=0)
        self.posterior = None

    @property
    def _predictive_variable(self):
        return self.Z

    def has_uncertain_inputs(self):
        return isinstance(self.X, VariationalPosterior)

    def set_Z(self, Z, trigger_update=True):
        if trigger_update: self.update_model(False)
        self.unlink_parameter(self.Z)
        self.Z = Param('inducing inputs',Z)
        self.link_parameter(self.Z, index=0)
        if trigger_update: self.update_model(True)

    def parameters_changed(self):
        self.posterior, self._log_marginal_likelihood, self.grad_dict = \
        self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood,
                                        self.Y_normalized, Y_metadata=self.Y_metadata,
                                        mean_function=self.mean_function)
        self._update_gradients()

    def _update_gradients(self):
        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        if self.mean_function is not None:
            self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)

        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
            dL_dKmm = self.grad_dict['dL_dKmm']
            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
            kerngrad = self.kern.gradient.copy()
            self.kern.update_gradients_expectations(variational_posterior=self.X,
                                                    Z=self.Z,
                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
            self.kern.gradient += kerngrad

            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
            self.Z.gradient += self.kern.gradients_Z_expectations(
                               self.grad_dict['dL_dpsi0'],
                               self.grad_dict['dL_dpsi1'],
                               self.grad_dict['dL_dpsi2'],
                               Z=self.Z,
                               variational_posterior=self.X)
        else:
            #gradients wrt kernel
            self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
            kerngrad = self.kern.gradient.copy()
            self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
            kerngrad += self.kern.gradient
            self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
            self.kern.gradient += kerngrad
            #gradients wrt Z
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
        self._Zgrad = self.Z.gradient.copy()

    def to_dict(self, save_data=True):
        input_dict = super(SparseGP, self).to_dict(save_data)
        input_dict["class"] = "GPy.core.SparseGP"
        input_dict["Z"] = self.Z.tolist()
        return input_dict

    @staticmethod
    def _from_dict(input_dict, data=None):
        import GPy
        if (input_dict['X'] is None) or (input_dict['Y'] is None):
            assert(data is not None)
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
        elif data is not None:
            print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
        else:
            input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])

        input_dict["Z"] = np.array(input_dict['Z'])
        input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
        input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])
        mean_function = input_dict.get("mean_function")
        if mean_function is not None:
            input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)
        else:
            input_dict["mean_function"] = mean_function
        input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])

        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
        Y_metadata = input_dict.get("Y_metadata")
        input_dict["Y_metadata"] = Y_metadata

        normalizer = input_dict.get("normalizer")
        if normalizer is not None:
            input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
        else:
            input_dict["normalizer"] = normalizer
        return SparseGP(**input_dict)
more ]#copyrighting 2014-11-21 11:40:50 +00:00			`# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).`
refactoring files added 2013-06-05 14:11:49 +01:00			`# Licensed under the BSD 3-clause license (see LICENSE.txt)`

			`import numpy as np`
More relative import fixes for Python 3 compatibility 2015-02-26 07:14:40 +00:00			`from .gp import GP`
			`from .parameterization.param import Param`
variational posterior and prior added, linear updated 2014-02-24 09:49:29 +00:00			`from ..inference.latent_function_inference import var_dtc`
some hacking on sparse_gp inference 2014-01-29 17:02:44 +00:00			`from .. import likelihoods`
[dir] structure preserved 2015-10-15 15:13:16 +01:00			`from GPy.core.parameterization.variational import VariationalPosterior`
refactoring files added 2013-06-05 14:11:49 +01:00
logging 2014-07-02 11:15:25 -07:00			`import logging`
			`logger = logging.getLogger("sparse gp")`

removed a lot of unnecessary code in sparse GP 2014-01-22 15:24:05 +00:00			`class SparseGP(GP):`
refactoring files added 2013-06-05 14:11:49 +01:00			`"""`
removed a lot of unnecessary code in sparse GP 2014-01-22 15:24:05 +00:00			`A general purpose Sparse GP model`
refactoring files added 2013-06-05 14:11:49 +01:00
changed gradient interface to gp and sparse GP 2014-01-28 14:40:07 +00:00			`This model allows (approximate) inference using variational DTC or FITC`
			`(Gaussian likelihoods) as well as non-conjugate sparse methods based on`
			`these.`
mean functions now working for svgp. with tests 2015-03-26 16:20:17 +00:00
[sparse gp] doc changes for missing data 2015-03-23 08:48:06 +00:00			`This is not for missing data, as the implementation for missing data involves`
			`some inefficient optimization routine decisions.`
			`See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.`
changed gradient interface to gp and sparse GP 2014-01-28 14:40:07 +00:00
refactoring files added 2013-06-05 14:11:49 +01:00			`:param X: inputs`
lots of bugfixes after refactoring 2013-06-05 16:14:43 +01:00			`:type X: np.ndarray (num_data x input_dim)`
refactoring files added 2013-06-05 14:11:49 +01:00			`:param likelihood: a likelihood instance, containing the observed data`
			`:type likelihood: GPy.likelihood.(Gaussian \| EP \| Laplace)`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00			`:param kernel: the kernel (covariance function). See link kernels`
refactoring files added 2013-06-05 14:11:49 +01:00			`:type kernel: a GPy.kern.kern instance`
			`:param X_variance: The uncertainty in the measurements of X (Gaussian variance)`
lots of bugfixes after refactoring 2013-06-05 16:14:43 +01:00			`:type X_variance: np.ndarray (num_data x input_dim) \| None`
removed a lot of unnecessary code in sparse GP 2014-01-22 15:24:05 +00:00			`:param Z: inducing inputs`
			`:type Z: np.ndarray (num_inducing x input_dim)`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00			`:param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)`
refactoring files added 2013-06-05 14:11:49 +01:00			`:type num_inducing: int`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
refactoring files added 2013-06-05 14:11:49 +01:00			`"""`

[classification] sparse gp classification and dtc update 2015-09-11 15:08:30 +01:00			`def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, X_variance=None, inference_method=None,`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`name='sparse gp', Y_metadata=None, normalizer=False):`
[sparsegp] check for missing data 2015-09-30 08:22:32 +01:00
removed a lot of unnecessary code in sparse GP 2014-01-22 15:24:05 +00:00			`#pick a sensible inference method`
			`if inference_method is None:`
			`if isinstance(likelihood, likelihoods.Gaussian):`
[chaching] changing all chacher limits to 3 2016-03-07 11:37:22 +00:00			`inference_method = var_dtc.VarDTC(limit=3)`
changes to DTC 2014-02-11 20:05:36 +00:00			`else:`
general bugfixing 2014-02-13 08:53:14 +00:00			`#inference_method = ??`
Exception fixes for Python 3 compat 2015-02-26 13:33:39 +00:00			`raise NotImplementedError("what to do what to do?")`
Python 3 fixes 2015-09-07 14:07:09 +01:00			`print(("defaulting to ", inference_method, "for latent function inference"))`
removed a lot of unnecessary code in sparse GP 2014-01-22 15:24:05 +00:00
an afternoon's work on the laplace approximation 2014-02-05 16:23:35 +00:00			`self.Z = Param('inducing inputs', Z)`
refactoring files added 2013-06-05 14:11:49 +01:00			`self.num_inducing = Z.shape[0]`
sorting ouyt the variational posterior objects 2014-02-24 15:44:11 +00:00
mean functions now working for svgp. with tests 2015-03-26 16:20:17 +00:00			`GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)`
[vardtc] missing data handling and stochastic update in d 2014-10-16 12:52:17 +01:00
logging 2014-07-02 11:15:25 -07:00			`logger.info("Adding Z as parameter")`
[link\|unlink_parameter] renaming add_parameter to link_parameter 2014-09-08 08:57:28 +01:00			`self.link_parameter(self.Z, index=0)`
[vardtc] missing data handling and stochastic update in d 2014-10-16 12:52:17 +01:00			`self.posterior = None`
[core] updating system, security branching 2015-09-02 09:06:17 +01:00
[pred_var] added predictive variable as property now 2015-09-10 15:50:49 +01:00			`@property`
			`def _predictive_variable(self):`
			`return self.Z`
general bugfixing 2014-02-13 08:53:14 +00:00
linear without caching, derivatives done 2014-02-21 09:14:31 +00:00			`def has_uncertain_inputs(self):`
kernel tests in working order (not all implemented though 2014-02-24 17:44:12 +00:00			`return isinstance(self.X, VariationalPosterior)`
mean functions now working for svgp. with tests 2015-03-26 16:20:17 +00:00
add trigger update to set_{X,Y,Z} 2015-03-13 09:47:36 +00:00			`def set_Z(self, Z, trigger_update=True):`
			`if trigger_update: self.update_model(False)`
add set_Z function 2015-03-09 17:26:18 +00:00			`self.unlink_parameter(self.Z)`
			`self.Z = Param('inducing inputs',Z)`
			`self.link_parameter(self.Z, index=0)`
add trigger update to set_{X,Y,Z} 2015-03-13 09:47:36 +00:00			`if trigger_update: self.update_model(True)`
refactoring files added 2013-06-05 14:11:49 +01:00
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`def parameters_changed(self):`
Add mean function functionality to dtc inference method 2017-11-13 21:15:38 +00:00			`self.posterior, self._log_marginal_likelihood, self.grad_dict = \`
			`self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood,`
fix: #590 Y_normalized was not used for running optimization 2018-01-10 14:16:36 +01:00			`self.Y_normalized, Y_metadata=self.Y_metadata,`
Add mean function functionality to dtc inference method 2017-11-13 21:15:38 +00:00			`mean_function=self.mean_function)`
[classification] sparse gp classification and dtc update 2015-09-11 15:08:30 +01:00			`self._update_gradients()`
[missing data] general implementation for subsetting data 2014-10-08 12:03:51 +01:00
[classification] sparse gp classification and dtc update 2015-09-11 15:08:30 +01:00			`def _update_gradients(self):`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])`
Add mean function functionality to dtc inference method 2017-11-13 21:15:38 +00:00			`if self.mean_function is not None:`
			`self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)`
[missing data] general implementation for subsetting data 2014-10-08 12:03:51 +01:00
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`if isinstance(self.X, VariationalPosterior):`
messing with kernels 2014-02-25 17:15:38 +00:00			`#gradients wrt kernel`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`dL_dKmm = self.grad_dict['dL_dKmm']`
			`self.kern.update_gradients_full(dL_dKmm, self.Z, None)`
			`kerngrad = self.kern.gradient.copy()`
			`self.kern.update_gradients_expectations(variational_posterior=self.X,`
			`Z=self.Z,`
			`dL_dpsi0=self.grad_dict['dL_dpsi0'],`
			`dL_dpsi1=self.grad_dict['dL_dpsi1'],`
			`dL_dpsi2=self.grad_dict['dL_dpsi2'])`
			`self.kern.gradient += kerngrad`
messing with kernels 2014-02-25 17:15:38 +00:00
			`#gradients wrt Z`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)`
			`self.Z.gradient += self.kern.gradients_Z_expectations(`
			`self.grad_dict['dL_dpsi0'],`
			`self.grad_dict['dL_dpsi1'],`
			`self.grad_dict['dL_dpsi2'],`
			`Z=self.Z,`
			`variational_posterior=self.X)`
linear without caching, derivatives done 2014-02-21 09:14:31 +00:00			`else:`
messing with kernels 2014-02-25 17:15:38 +00:00			`#gradients wrt kernel`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)`
			`kerngrad = self.kern.gradient.copy()`
			`self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)`
			`kerngrad += self.kern.gradient`
			`self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)`
			`self.kern.gradient += kerngrad`
messing with kernels 2014-02-25 17:15:38 +00:00			`#gradients wrt Z`
[VarDTC] reverted SparseGP to previous state, updated BGPLVM accordingly 2014-11-03 11:16:34 +00:00			`self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)`
			`self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)`
Fixed MRD inducing point gradients 2015-09-24 13:44:28 +01:00			`self._Zgrad = self.Z.gradient.copy()`
Sparse GP serialization 2018-05-16 08:53:55 +01:00
			`def to_dict(self, save_data=True):`
			`input_dict = super(SparseGP, self).to_dict(save_data)`
			`input_dict["class"] = "GPy.core.SparseGP"`
			`input_dict["Z"] = self.Z.tolist()`
			`return input_dict`

			`@staticmethod`
			`def _from_dict(input_dict, data=None):`
			`import GPy`
			`if (input_dict['X'] is None) or (input_dict['Y'] is None):`
			`assert(data is not None)`
			`input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])`
			`elif data is not None:`
			`print("WARNING: The model has been saved with X,Y! The original values are being overriden!")`
			`input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])`
			`else:`
			`input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])`

			`input_dict["Z"] = np.array(input_dict['Z'])`
			`input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])`
			`input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])`
			`mean_function = input_dict.get("mean_function")`
			`if mean_function is not None:`
			`input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)`
			`else:`
			`input_dict["mean_function"] = mean_function`
			`input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])`

			`#FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class`
			`Y_metadata = input_dict.get("Y_metadata")`
			`input_dict["Y_metadata"] = Y_metadata`

			`normalizer = input_dict.get("normalizer")`
			`if normalizer is not None:`
			`input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)`
			`else:`
			`input_dict["normalizer"] = normalizer`
			`return SparseGP(**input_dict)`