merge the current devel into psi2

2026-06-02 14:45:15 +02:00 · 2014-08-11 18:01:23 +01:00 · 2014-08-11 18:01:23 +01:00 · 785c580032
commit 785c580032
parent 9f1bd3ef25 3651374617
49 changed files with 1839 additions and 581 deletions
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@ -10,6 +10,7 @@ from ..util import linalg
 from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
 from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
 from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
+import logging

 class BayesianGPLVM(SparseGP):
    """
@ -27,8 +28,10 @@ class BayesianGPLVM(SparseGP):
                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, **kwargs):
        self.mpi_comm = mpi_comm
        self.__IN_OPTIMIZATION__ = False
+        self.logger = logging.getLogger(self.__class__.__name__)
        if X == None:
            from ..util.initialization import initialize_latent
+            self.logger.info("initializing latent space X with method {}".format(init))
            X, fracs = initialize_latent(init, input_dim, Y)
        else:
            fracs = np.ones(input_dim)
@ -36,31 +39,35 @@ class BayesianGPLVM(SparseGP):
        self.init = init

        if X_variance is None:
+            self.logger.info("initializing latent space variance ~ uniform(0,.1)")
            X_variance = np.random.uniform(0,.1,X.shape)

-
        if Z is None:
+            self.logger.info("initializing inducing inputs")
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]

        if kernel is None:
+            self.logger.info("initializing kernel RBF")
            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)

        if likelihood is None:
            likelihood = Gaussian()

-
        self.variational_prior = NormalPrior()
        X = NormalPosterior(X, X_variance)

        if inference_method is None:
-            if np.any(np.isnan(Y)):
+            inan = np.isnan(Y)
+            if np.any(inan):
                from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
-                inference_method = VarDTCMissingData()
+                self.logger.debug("creating inference_method with var_dtc missing data")
+                inference_method = VarDTCMissingData(inan=inan)
            elif mpi_comm is not None:
                inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
            else:
                from ..inference.latent_function_inference.var_dtc import VarDTC
+                self.logger.debug("creating inference_method var_dtc")
                inference_method = VarDTC()
        if isinstance(inference_method,VarDTC_minibatch):
            inference_method.mpi_comm = mpi_comm
@ -69,6 +76,7 @@ class BayesianGPLVM(SparseGP):
            kernel.psicomp.GPU_direct = True

        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
+        self.logger.info("Adding X as parameter")
        self.add_parameter(self.X, index=0)

        if mpi_comm != None:
@ -98,13 +106,29 @@ class BayesianGPLVM(SparseGP):

        self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])

+        # This is testing code -------------------------
+#         i = np.random.randint(self.X.shape[0])
+#         X_ = self.X.mean
+#         which = np.sqrt(((X_ - X_[i:i+1])**2).sum(1)).argsort()>(max(0, self.X.shape[0]-51))
+#         _, _, grad_dict = self.inference_method.inference(self.kern, self.X[which], self.Z, self.likelihood, self.Y[which], self.Y_metadata)
+#         grad = self.kern.gradients_qX_expectations(variational_posterior=self.X[which], Z=self.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
+#
+#         self.X.mean.gradient[:] = 0
+#         self.X.variance.gradient[:] = 0
+#         self.X.mean.gradient[which] = grad[0]
+#         self.X.variance.gradient[which] = grad[1]
+
+        # update for the KL divergence
+#         self.variational_prior.update_gradients_KL(self.X, which)
+        # -----------------------------------------------
+
        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)

    def plot_latent(self, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=True, legend=True,
-                plot_limits=None, 
+                plot_limits=None,
                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
@ -122,36 +146,41 @@ class BayesianGPLVM(SparseGP):
        Notes:
        This will only work with a univariate Gaussian likelihood (for now)
        """
-        assert not self.likelihood.is_heteroscedastic
        N_test = Y.shape[0]
        input_dim = self.Z.shape[1]
+
        means = np.zeros((N_test, input_dim))
        covars = np.zeros((N_test, input_dim))

-        dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
-        dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
-        V = self.likelihood.precision * Y
+        dpsi0 = -0.5 * self.input_dim / self.likelihood.variance
+        dpsi2 = self.grad_dict['dL_dpsi2'][0][None, :, :] # TODO: this may change if we ignore het. likelihoods
+        V = Y/self.likelihood.variance

        #compute CPsi1V
-        if self.Cpsi1V is None:
-            psi1V = np.dot(self.psi1.T, self.likelihood.V)
-            tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
-            tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
-            self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
+        #if self.Cpsi1V is None:
+        #    psi1V = np.dot(self.psi1.T, self.likelihood.V)
+        #    tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+        #    tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
+        #    self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)

-        dpsi1 = np.dot(self.Cpsi1V, V.T)
+        dpsi1 = np.dot(self.posterior.woodbury_vector, V.T)

-        start = np.zeros(self.input_dim * 2)
+        #start = np.zeros(self.input_dim * 2)
+
+
+        from scipy.optimize import minimize

        for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
-            args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
-            xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
-
+            args = (input_dim, self.kern.copy(), self.Z, dpsi0, dpsi1_n.T, dpsi2)
+            res = minimize(latent_cost_and_grad, jac=True, x0=np.hstack((means[n], covars[n])), args=args, method='BFGS')
+            xopt = res.x
            mu, log_S = xopt.reshape(2, 1, -1)
            means[n] = mu[0].copy()
            covars[n] = np.exp(log_S[0]).copy()

-        return means, covars
+        X = NormalPosterior(means, covars)
+
+        return X

    def dmu_dX(self, Xnew):
        """
@ -181,7 +210,6 @@ class BayesianGPLVM(SparseGP):
        from ..plotting.matplot_dep import dim_reduction_plots

        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
-
    def __getstate__(self):
        dc = super(BayesianGPLVM, self).__getstate__()
        dc['mpi_comm'] = None
@ -227,57 +255,27 @@ class BayesianGPLVM(SparseGP):
                    raise Exception("Unrecognizable flag for synchronization!")
        self.__IN_OPTIMIZATION__ = False

-def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
+
+def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
    """
    objective function for fitting the latent variables for test points
    (negative log-likelihood: should be minimised!)
    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
+    mu = mu_S[:input_dim][None]
+    log_S = mu_S[input_dim:][None]
    S = np.exp(log_S)

-    psi0 = kern.psi0(Z, mu, S)
-    psi1 = kern.psi1(Z, mu, S)
-    psi2 = kern.psi2(Z, mu, S)
+    X = NormalPosterior(mu, S)

-    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
+    psi0 = kern.psi0(Z, X)
+    psi1 = kern.psi1(Z, X)
+    psi2 = kern.psi2(Z, X)

-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
+    lik = dL_dpsi0 * psi0.sum() + np.einsum('ij,kj->...', dL_dpsi1, psi1) + np.einsum('ijk,lkj->...', dL_dpsi2, psi2) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)

-    dmu = mu0 + mu1 + mu2 - mu
+    dLdmu, dLdS = kern.gradients_qX_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, X)
+    dmu = dLdmu - mu
    # dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
+    dlnS = S * (dLdS - 0.5) + .5
+
    return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
-
-def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
-    """
-    objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
-    This is the same as latent_cost_and_grad but only for the objective
-    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
-    S = np.exp(log_S)
-
-    psi0 = kern.psi0(Z, mu, S)
-    psi1 = kern.psi1(Z, mu, S)
-    psi2 = kern.psi2(Z, mu, S)
-
-    lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
-    return -float(lik)
-
-def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
-    """
-    This is the same as latent_cost_and_grad but only for the grad
-    """
-    mu, log_S = mu_S.reshape(2, 1, -1)
-    S = np.exp(log_S)
-
-    mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
-    mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
-    mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
-
-    dmu = mu0 + mu1 + mu2 - mu
-    # dS = S0 + S1 + S2 -0.5 + .5/S
-    dlnS = S * (S0 + S1 + S2 - 0.5) + .5
-
-    return -np.hstack((dmu.flatten(), dlnS.flatten()))
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -2,10 +2,8 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)

 import numpy as np
-import itertools
-import pylab
+import itertools, logging

-from ..core import Model
 from ..kern import Kern
 from ..core.parameterization.variational import NormalPosterior, NormalPrior
 from ..core.parameterization import Param, Parameterized
@ -61,15 +59,18 @@ class MRD(SparseGP):
                 inference_method=None, likelihoods=None, name='mrd', Ynames=None):
        super(GP, self).__init__(name)

+        self.logger = logging.getLogger(self.__class__.__name__)
        self.input_dim = input_dim
        self.num_inducing = num_inducing

        if isinstance(Ylist, dict):
            Ynames, Ylist = zip(*Ylist.items())

+        self.logger.debug("creating observable arrays")
        self.Ylist = [ObsAr(Y) for Y in Ylist]

        if Ynames is None:
+            self.logger.debug("creating Ynames")
            Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
        self.names = Ynames
        assert len(self.names) == len(self.Ylist), "one name per dataset, or None if Ylist is a dict"
@ -81,13 +82,15 @@ class MRD(SparseGP):
                inan = np.isnan(y)
                if np.any(inan):
                    if not warned:
-                        print "WARING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X"
+                        self.logger.warn("WARNING: NaN values detected, make sure initx method can cope with NaN values or provide starting latent space X")
                        warned = True
                    self.inference_method.append(VarDTCMissingData(limit=1, inan=inan))
                else:
                    self.inference_method.append(VarDTC(limit=1))
+                self.logger.debug("created inference method <{}>".format(hex(id(self.inference_method[-1]))))
        else:
            if not isinstance(inference_method, InferenceMethodList):
+                self.logger.debug("making inference_method an InferenceMethodList")
                inference_method = InferenceMethodList(inference_method)
            self.inference_method = inference_method

@ -101,18 +104,19 @@ class MRD(SparseGP):
        self.num_inducing = self.Z.shape[0] # ensure M==N if M>N

        # sort out the kernels
+        self.logger.info("building kernels")
        if kernel is None:
            from ..kern import RBF
-            self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
+            kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
        elif isinstance(kernel, Kern):
-            self.kernels = []
+            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
-                self.kernels.append(k)
+                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
            assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
-            self.kernels = kernel
+            kernels = kernel

        if X_variance is None:
            X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -121,17 +125,17 @@ class MRD(SparseGP):
        self.X = NormalPosterior(X, X_variance)

        if likelihoods is None:
-            self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
-        else: self.likelihoods = likelihoods
+            likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
+        else: likelihoods = likelihoods

+        self.logger.info("adding X and Z")
        self.add_parameters(self.X, self.Z)

        self.bgplvms = []
        self.num_data = Ylist[0].shape[0]

-        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
+        for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
-
            p = Parameterized(name=n)
            p.add_parameter(k)
            p.kern = k
@ -141,6 +145,7 @@ class MRD(SparseGP):
            self.bgplvms.append(p)

        self.posterior = None
+        self.logger.info("init done")
        self._in_init_ = False

    def parameters_changed(self):
@ -148,8 +153,9 @@ class MRD(SparseGP):
        self.posteriors = []
        self.Z.gradient[:] = 0.
        self.X.gradient[:] = 0.
-
-        for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
+        for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
+            self.logger.info('working on im <{}>'.format(hex(id(i))))
+            k, l = b.kern, b.likelihood
            posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)

            self.posteriors.append(posterior)
@ -177,11 +183,11 @@ class MRD(SparseGP):
            self.X.mean.gradient += dL_dmean
            self.X.variance.gradient += dL_dS

-        # update for the KL divergence
        self.posterior = self.posteriors[0]
-        self.kern = self.kernels[0]
-        self.likelihood = self.likelihoods[0]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood

+        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.X)
        self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)

@ -219,8 +225,9 @@ class MRD(SparseGP):
        return Z

    def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
+        import matplotlib.pyplot as plt
        if axes is None:
-            fig = pylab.figure(num=fignum)
+            fig = plt.figure(num=fignum)
        sharex_ax = None
        sharey_ax = None
        plots = []
@ -242,8 +249,8 @@ class MRD(SparseGP):
                raise ValueError("Need one axes per latent dimension input_dim")
            plots.append(plotf(i, g, ax))
            if sharey_ax is not None:
-                pylab.setp(ax.get_yticklabels(), visible=False)
-        pylab.draw()
+                plt.setp(ax.get_yticklabels(), visible=False)
+        plt.draw()
        if axes is None:
            try:
                fig.tight_layout()
@ -257,8 +264,8 @@ class MRD(SparseGP):
        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
        """
        self.posterior = self.posteriors[Yindex]
-        self.kern = self.kernels[Yindex]
-        self.likelihood = self.likelihoods[Yindex]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood
        return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)

    #===============================================================================
@ -300,11 +307,12 @@ class MRD(SparseGP):
        """
        import sys
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
+        from matplotlib import pyplot as plt
        from ..plotting.matplot_dep import dim_reduction_plots
        if "Yindex" not in predict_kwargs:
            predict_kwargs['Yindex'] = 0
        if ax is None:
-            fig = pylab.figure(num=fignum)
+            fig = plt.figure(num=fignum)
            ax = fig.add_subplot(111)
        else:
            fig = ax.figure
@ -321,10 +329,7 @@ class MRD(SparseGP):
        return plot

    def __getstate__(self):
-        # TODO:
-        import copy
-        state = copy.copy(self.__dict__)
-        del state['kernels']
+        state = super(MRD, self).__getstate__()
        del state['kern']
        del state['likelihood']
        return state
@ -332,7 +337,6 @@ class MRD(SparseGP):
    def __setstate__(self, state):
        # TODO:
        super(MRD, self).__setstate__(state)
-        self.kernels = [p.kern for p in self.bgplvms]
-        self.kern = self.kernels[0]
-        self.likelihood = self.likelihoods[0]
+        self.kern = self.bgplvms[0].kern
+        self.likelihood = self.bgplvms[0].likelihood
        self.parameters_changed()
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@ -64,7 +64,7 @@ class SSGPLVM(SparseGP):
        if inference_method is None:
            inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)

-        self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
+        self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=True) # the prior probability of the latent binary variable b
        
        X = SpikeAndSlabPosterior(X, X_variance, gamma)