Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-07-02 16:01:03 +02:00 · 2014-11-03 16:04:59 +00:00 · 2014-11-03 16:04:59 +00:00 · 105a6c5377
commit 105a6c5377
parent 1840b7e6b8 63c2321469
19 changed files with 138 additions and 464 deletions
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -415,7 +415,6 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
 def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
    from GPy import kern
    from GPy.models import MRD
-    from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData

    D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
    _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
@ -429,12 +428,8 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
        inanlist.append(inan)
        Y[inan] = _np.nan

-    imlist = []
-    for inan in inanlist:
-        imlist.append(VarDTCMissingData(limit=1, inan=inan))
-
    m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
-            kernel=k, inference_method=imlist,
+            kernel=k, inference_method=None,
            initx="random", initz='permute', **kw)

    if optimize:
@ -494,7 +489,7 @@ def olivetti_faces(optimize=True, verbose=True, plot=True):

 def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
    import GPy
-    import pods 
+    import pods

    data = pods.datasets.osu_run1()
    # optimize
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -167,7 +167,7 @@ class VarDTC(LatentFunctionInference):
            woodbury_vector = Cpsi1Vf # == Cpsi1V
        else:
            print 'foobar'
-            stop
+            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
--- a/GPy/likelihoods/ordinal.py
+++ b/GPy/likelihoods/ordinal.py
@ -1,47 +0,0 @@
-# Copyright (c) 2014 The GPy authors (see AUTHORS.txt)
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import sympy as sym
-from GPy.util.symbolic import gammaln, normcdfln, normcdf, IndMatrix, create_matrix
-import numpy as np
-import link_functions
-from symbolic import Symbolic
-from scipy import stats
-
-class Ordinal(Symbolic):
-    """
-    Ordinal
-
-    .. math::
-        p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i}
-
-    .. Note::
-        Y takes non zero integer values..
-        link function should have a positive domain, e.g. log (default).
-
-    .. See also::
-        symbolic.py, for the parent class
-    """
-    def __init__(self, categories=3, gp_link=None):
-        if gp_link is None:
-            gp_link = link_functions.Identity()
-
-        dispersion = sym.Symbol('width', positive=True, real=True)
-        y_0 = sym.Symbol('y_0', nonnegative=True, integer=True)
-        f_0 = sym.Symbol('f_0', positive=True, real=True) 
-        log_pdf = create_matrix('log_pdf', 1, categories)
-        log_pdf[0] = normcdfln(-f_0)
-        if categories>2:
-            w = create_matrix('w', 1, categories)
-            log_pdf[categories-1] = normcdfln(w.sum() + f_0)
-            for i in range(1, categories-1):
-                log_pdf[i] = sym.log(normcdf(w[0, 0:i-1].sum() + f_0) - normcdf(w[0, 0:i].sum()-f_0) )
-        else:
-            log_pdf[1] = normcdfln(f_0)
-        log_pdf.index_var = y_0
-        super(Ordinal, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Ordinal')
-
-        # TODO: Check this.
-        self.log_concave = True
-
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -13,11 +13,11 @@ from ..inference.latent_function_inference import InferenceMethodList
 from ..likelihoods import Gaussian
 from ..util.initialization import initialize_latent
 from ..core.sparse_gp import SparseGP, GP
-from GPy.models.bayesian_gplvm import BayesianGPLVM
 from GPy.core.parameterization.variational import VariationalPosterior
-from GPy.core.sparse_gp_mpi import SparseGP_MPI
+from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
+from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch

-class MRD(BayesianGPLVM):
+class MRD(BayesianGPLVMMiniBatch):
    """
    !WARNING: This is bleeding edge code and still in development.
    Functionality may change fundamentally during development!
@ -92,7 +92,8 @@ class MRD(BayesianGPLVM):
        else:
            fracs = [X.var(0)]*len(Ylist)

-        self.Z = Param('inducing inputs', self._init_Z(initz, X))
+        Z = self._init_Z(initz, X)
+        self.Z = Param('inducing inputs', Z)
        self.num_inducing = self.Z.shape[0] # ensure M==N if M>N

        # sort out the kernels
@ -104,6 +105,7 @@ class MRD(BayesianGPLVM):
            kernels = []
            for i in range(len(Ylist)):
                k = kernel.copy()
+                print k is kernel, k.observers, k.constraints
                kernels.append(k)
        else:
            assert len(kernel) == len(Ylist), "need one kernel per output"
@ -114,7 +116,7 @@ class MRD(BayesianGPLVM):
            X_variance = np.random.uniform(0.1, 0.2, X.shape)

        self.variational_prior = NormalPrior()
-        self.X = NormalPosterior(X, X_variance)
+        #self.X = NormalPosterior(X, X_variance)

        if likelihoods is None:
            likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
@ -123,48 +125,33 @@ class MRD(BayesianGPLVM):
        self.logger.info("adding X and Z")
        super(MRD, self).__init__(Y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,
                 Z=self.Z, kernel=None, inference_method=self.inference_method, likelihood=Gaussian(),
-                 name='bayesian gplvm', mpi_comm=None, normalizer=None,
+                 name='manifold relevance determination', normalizer=None,
                 missing_data=False, stochastic=False, batchsize=1)

-        import GPy
        self._log_marginal_likelihood = 0

-        print "------------"
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
        self.unlink_parameter(self.likelihood)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
        self.unlink_parameter(self.kern)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "------------"
-
-        print
-        print '================='
+        del self.kern
+        del self.likelihood

        self.num_data = Ylist[0].shape[0]
        if isinstance(batchsize, int):
            batchsize = itertools.repeat(batchsize)

-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
+        self.bgplvms = []

        for i, n, k, l, Y, im, bs in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist, self.inference_method, batchsize):
            assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
            md = np.isnan(Y).any()
-            spgp = SparseGP(self.X, Y, self.Z, k, l, im, n, None, normalizer, md, stochastic, bs)
+            spgp = SparseGPMiniBatch(self.X, Y, Z, k, l, im, n, None, normalizer, md, stochastic, bs)
            spgp.unlink_parameter(spgp.Z)
+            del spgp.Z
+            del spgp.X
            spgp.Z = self.Z
+            spgp.X = self.X
            self.link_parameter(spgp, i+2)
-
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        self.link_parameter(self.Z, 2)
-        print self.size
-        print self.constraints[GPy.constraints.Logexp()][-10:]
-        print "==========="
+            self.bgplvms.append(spgp)

        self.posterior = None
        self.logger.info("init done")
@ -173,7 +160,9 @@ class MRD(BayesianGPLVM):
        self._log_marginal_likelihood = 0
        self.Z.gradient[:] = 0.
        self.X.gradient[:] = 0.
-        for b, i in itertools.izip(self.parameters[3:], self.inference_method):
+        for b, i in itertools.izip(self.bgplvms, self.inference_method):
+            self._log_marginal_likelihood += b._log_marginal_likelihood
+
            self.logger.info('working on im <{}>'.format(hex(id(i))))
            self.Z.gradient[:] += b.full_values['Zgrad']
            grad_dict = b.grad_dict
@ -195,6 +184,7 @@ class MRD(BayesianGPLVM):
            # update for the KL divergence
            self.variational_prior.update_gradients_KL(self.X)
            self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
+            pass

    def log_likelihood(self):
        return self._log_marginal_likelihood
@ -268,7 +258,7 @@ class MRD(BayesianGPLVM):
        Prediction for data set Yindex[default=0].
        This predicts the output mean and variance for the dataset given in Ylist[Yindex]
        """
-        b = self.parameters[Yindex+2]
+        b = self.bgplvms[Yindex]
        self.posterior = b.posterior
        self.kern = b.kern
        self.likelihood = b.likelihood
@ -317,16 +307,20 @@ class MRD(BayesianGPLVM):
        from ..plotting.matplot_dep import dim_reduction_plots
        if "Yindex" not in predict_kwargs:
            predict_kwargs['Yindex'] = 0
+
+        Yindex = predict_kwargs['Yindex']
        if ax is None:
            fig = plt.figure(num=fignum)
            ax = fig.add_subplot(111)
        else:
            fig = ax.figure
+        self.kern = self.bgplvms[Yindex].kern
+        self.likelihood = self.bgplvms[Yindex].likelihood
        plot = dim_reduction_plots.plot_latent(self, labels, which_indices,
                                        resolution, ax, marker, s,
                                        fignum, plot_inducing, legend,
                                        plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
-        ax.set_title(self.bgplvms[predict_kwargs['Yindex']].name)
+        ax.set_title(self.bgplvms[Yindex].name)
        try:
            fig.tight_layout()
        except:
@ -336,8 +330,10 @@ class MRD(BayesianGPLVM):

    def __getstate__(self):
        state = super(MRD, self).__getstate__()
-        del state['kern']
-        del state['likelihood']
+        if state.has_key('kern'):
+            del state['kern']
+        if state.has_key('likelihood'):
+            del state['likelihood']
        return state

    def __setstate__(self, state):
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -447,6 +447,7 @@ class GradientTests(np.testing.TestCase):
        m = GPy.models.GPHeteroscedasticRegression(X, Y, kern)
        self.assertTrue(m.checkgrad())

+
    def test_gp_kronecker_gaussian(self):
        N1, N2 = 30, 20
        X1 = np.random.randn(N1, 1)
--- a/GPy/testing/observable_tests.py
+++ b/GPy/testing/observable_tests.py
@ -130,7 +130,6 @@ class Test(unittest.TestCase):
        self.assertEqual(self._first, self._trigger, 'priority should be second')
        self.assertEqual(self._second, self._trigger_priority, 'priority should be second')

-
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@ -221,6 +221,31 @@ class ParameterizedTest(unittest.TestCase):
        np.testing.assert_equal(t.x.constraints[Logistic(0,1)], c[Logistic(0,1)])
        np.testing.assert_equal(t.x.constraints['fixed'], c['fixed'])

+    def test_parameter_modify_in_init(self):
+        class TestLikelihood(Parameterized):
+            def __init__(self, param1 = 2., param2 = 3.):
+                super(TestLikelihood, self).__init__("TestLike")
+                self.p1 = Param('param1', param1)
+                self.p2 = Param('param2', param2)
+
+                self.link_parameter(self.p1)
+                self.link_parameter(self.p2)
+
+                self.p1.fix()
+                self.p1.unfix()
+                self.p2.constrain_negative()
+                self.p1.fix()
+                self.p2.constrain_positive()
+                self.p2.fix()
+                self.p2.constrain_positive()
+
+        m = TestLikelihood()
+        print m
+        val = m.p1.values.copy()
+        self.assert_(m.p1.is_fixed)
+        self.assert_(m.constraints[GPy.constraints.Logexp()].tolist(), [1])
+        m.randomize()
+        self.assertEqual(m.p1, val)

    def test_printing(self):
        print self.test1
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@ -141,6 +141,7 @@ class Test(ListDictTestCase):
        pcopy.optimize('bfgs')
        par.optimize('bfgs')
        np.testing.assert_allclose(pcopy.param_array, par.param_array, atol=1e-6)
+        par.randomize()
        with tempfile.TemporaryFile('w+b') as f:
            par.pickle(f)
            f.seek(0)
--- a/GPy/util/parallel.py
+++ b/GPy/util/parallel.py
@ -4,11 +4,10 @@ The module of tools for parallelization (MPI)

 try:
    from mpi4py import MPI
+    def get_id_within_node(comm=MPI.COMM_WORLD):
+        rank = comm.rank
+        nodename =  MPI.Get_processor_name()
+        nodelist = comm.allgather(nodename)
+        return len([i for i in nodelist[:rank] if i==nodename])
 except:
    pass
-
-def get_id_within_node(comm=MPI.COMM_WORLD):
-    rank = comm.rank
-    nodename =  MPI.Get_processor_name()
-    nodelist = comm.allgather(nodename)
-    return len([i for i in nodelist[:rank] if i==nodename])