Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-05 17:52:39 +02:00 · 2013-06-05 18:06:11 +01:00 · 2013-06-05 18:06:11 +01:00 · 08ecf75e89
commit 08ecf75e89
parent fc57f470c2 b6006c5f6d
9 changed files with 57 additions and 52 deletions
--- a/GPy/core/fitc.py
+++ b/GPy/core/fitc.py
@ -36,8 +36,9 @@ class FITC(SparseGP):
        For a Gaussian likelihood, no iteration is required:
        this function does nothing
        """
+        self.likelihood.restart()
        self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
-        self._set_params(self._get_params()) # update the GP
+        self._set_params(self._get_params())

    def _compute_kernel_matrices(self):
        # kernel computations, using BGPLVM notation
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -67,6 +67,7 @@ class GP(GPBase):
        For a Gaussian likelihood, no iteration is required:
        this function does nothing
        """
+        self.likelihood.restart()
        self.likelihood.fit_full(self.kern.K(self.X))
        self._set_params(self._get_params())  # update the GP

--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -449,7 +449,7 @@ class Model(Parameterised):
        :type optimzer: string TODO: valid strings?

        """
-        assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
+        assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
        ll_change = epsilon + 1.
        iteration = 0
        last_ll = -np.inf
@ -461,16 +461,13 @@ class Model(Parameterised):
        while not stop:
            last_approximation = self.likelihood.copy()
            last_params = self._get_params()
-
-            self.likelihood.restart()
            self.update_likelihood_approximation()
-
            new_ll = self.log_likelihood()
            ll_change = new_ll - last_ll

            if ll_change < 0:
                self.likelihood = last_approximation # restore previous likelihood approximation
-                self._set_params(last_params) # restore Model parameters
+                self._set_params(last_params) # restore model parameters
                print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
                stop = True
            else:
@ -481,4 +478,4 @@ class Model(Parameterised):
            iteration += 1
            if stop:
                print "%s iterations." % iteration
-
+        self.update_likelihood_approximation()
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -173,7 +173,7 @@ class SparseGP(GPBase):
        this function does nothing
        """
        if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
-            self.likelihood.restart() # TODO check consistency with pseudo_EP
+            self.likelihood.restart()
            if self.has_uncertain_inputs:
                Lmi = chol_inv(self.Lm)
                Kmmi = tdot(Lmi.T)
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -26,32 +26,40 @@ def crescent_data(seed=default_seed): # FIXME

    m = GPy.models.GPClassification(data['X'], Y)
    m.ensure_default_constraints()
-    m.update_likelihood_approximation()
-    m.optimize()
+    #m.update_likelihood_approximation()
+    #m.optimize()
+    m.pseudo_EM()
    print(m)
    m.plot()
    return m

-def oil():
+def oil(num_inducing=50):
    """
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
-    Y = data['Y'][:, 0:1]
+    X = data['X'][:600,:]
+    X_test = data['X'][600:,:]
+    Y = data['Y'][:600, 0:1]
    Y[Y.flatten()==-1] = 0
+    Y_test = data['Y'][600:, 0:1]

    # Create GP model
-    m = GPy.models.GPClassification(data['X'], Y)
+    m = GPy.models.SparseGPClassification(X, Y,num_inducing=num_inducing)

    # Contrain all parameters to be positive
    m.constrain_positive('')
    m.tie_params('.*len')
+    m['.*len'] = 10.
    m.update_likelihood_approximation()

    # Optimize
    m.optimize()
-
    print(m)
+
+    #Test
+    probs = m.predict(X_test)[0]
+    GPy.util.classification.conf_matrix(probs,Y_test)
    return m

 def toy_linear_1d_classification(seed=default_seed):
@ -70,20 +78,20 @@ def toy_linear_1d_classification(seed=default_seed):
    m.ensure_default_constraints()

    # Optimize
-    m.update_likelihood_approximation()
+    #m.update_likelihood_approximation()
    # Parameters optimization:
-    m.optimize()
+    #m.optimize()
+    m.pseudo_EM()

    # Plot
-    pb.subplot(211)
-    m.plot_f()
-    pb.subplot(212)
-    m.plot()
+    fig, axes = pb.subplots(2,1)
+    m.plot_f(ax=axes[0])
+    m.plot(ax=axes[1])
    print(m)

    return m

-def sparse_toy_linear_1d_classification(seed=default_seed):
+def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
    """
    Sparse 1D classification example
    :param seed : seed value for data generation (default is 4).
@ -95,25 +103,25 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
    Y[Y.flatten() == -1] = 0

    # Model definition
-    m = GPy.models.SparseGPClassification(data['X'], Y)
-    m['.*len']= 2.
+    m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
+    m['.*len']= 4.

    m.ensure_default_constraints()
    # Optimize
-    m.update_likelihood_approximation()
+    #m.update_likelihood_approximation()
    # Parameters optimization:
-    m.optimize()
+    #m.optimize()
+    m.pseudo_EM()

    # Plot
-    pb.subplot(211)
-    m.plot_f()
-    pb.subplot(212)
-    m.plot()
+    fig, axes = pb.subplots(2,1)
+    m.plot_f(ax=axes[0])
+    m.plot(ax=axes[1])
    print(m)

    return m

-def sparse_crescent_data(inducing=10, seed=default_seed):
+def sparse_crescent_data(num_inducing=10, seed=default_seed):
    """
    Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

@ -128,16 +136,17 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
    Y = data['Y']
    Y[Y.flatten()==-1]=0

-    m = GPy.models.SparseGPClassification(data['X'], Y)
+    m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
    m.ensure_default_constraints()
    m['.*len'] = 10.
-    m.update_likelihood_approximation()
-    m.optimize()
+    #m.update_likelihood_approximation()
+    #m.optimize()
+    m.pseudo_EM()
    print(m)
    m.plot()
    return m

-def FITC_crescent_data(inducing=10, seed=default_seed):
+def FITC_crescent_data(num_inducing=10, seed=default_seed):
    """
    Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood.

@ -145,7 +154,7 @@ def FITC_crescent_data(inducing=10, seed=default_seed):
    :param seed : seed value for data generation.
    :type seed: int
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-    :type inducing: int
+    :type num_inducing: int
    """

    data = GPy.util.datasets.crescent_data(seed=seed)
@ -157,12 +166,12 @@ def FITC_crescent_data(inducing=10, seed=default_seed):
    Y = data['Y']
    Y[Y.flatten()==-1]=0

-    m = GPy.models.FITCClassification(data['X'], Y)
+    m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
    m.ensure_default_constraints()
    m['.*len'] = 3.
-    m.update_likelihood_approximation()
-    m.optimize()
+    #m.update_likelihood_approximation()
+    #m.optimize()
+    m.pseudo_EM()
    print(m)
    m.plot()
    return m
-
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -138,6 +138,9 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=4e3, plot

    # optimize
    if optimize:
+        m.constrain_fixed('noise')
+        m.optimize('scg', messages=1, max_f_eval=100, gtol=.05)
+        m.constrain_positive('noise')
        m.optimize('scg', messages=1, max_f_eval=max_f_eval, gtol=.05)

    if plot:
--- a/GPy/models/fitc_classification.py
+++ b/GPy/models/fitc_classification.py
@ -26,7 +26,7 @@ class FITCClassification(FITC):

    """

-    def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
+    def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
        if kernel is None:
            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)

@ -38,7 +38,7 @@ class FITCClassification(FITC):
                raise Warning, 'likelihood.data and Y are different.'

        if Z is None:
-            i = np.random.permutation(X.shape[0])[:M]
+            i = np.random.permutation(X.shape[0])[:num_inducing]
            Z = X[i].copy()
        else:
            assert Z.shape[1]==X.shape[1]
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -78,7 +78,7 @@ class MRD(Model):
        self.NQ = self.num_data * self.input_dim
        self.MQ = self.num_inducing * self.input_dim

-        model.__init__(self) # @UndefinedVariable
+        super(MRD, self).__init__()
        self._set_params(self._get_params())

    @property
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@ -175,7 +175,6 @@ class GradientTests(unittest.TestCase):
        m.ensure_default_constraints()
        m.update_likelihood_approximation()
        self.assertTrue(m.checkgrad())
-        # self.assertTrue(m.EPEM)

    def test_sparse_EP_DTC_probit(self):
        N = 20
@ -194,17 +193,12 @@ class GradientTests(unittest.TestCase):
        N = 20
        X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
        k = GPy.kern.rbf(1) + GPy.kern.white(1)
-        Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
-
-        distribution = GPy.likelihoods.likelihood_functions.Binomial()
-        likelihood = GPy.likelihoods.EP(Y, distribution)
-        #likelihood = GPy.inference.likelihoods.Binomial(Y)
-        m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4)
-        m.constrain_positive('(var|len)')
-        m.approximate_likelihood()
+        Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
+        m = GPy.models.FITCClassification(X, Y=Y)
+        m.ensure_default_constraints()
+        m.update_likelihood_approximation()
        self.assertTrue(m.checkgrad())

-
 if __name__ == "__main__":
    print "Running unit tests, please be (very) patient..."
    unittest.main()