From fa035ac39ee332938d7615396bde98d8ec1c15d2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 5 Jun 2013 18:01:05 +0100 Subject: [PATCH 1/4] examples corrected --- GPy/examples/classification.py | 71 +++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index bff8dcd1..648ddb5a 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -26,32 +26,40 @@ def crescent_data(seed=default_seed): # FIXME m = GPy.models.GPClassification(data['X'], Y) m.ensure_default_constraints() - m.update_likelihood_approximation() - m.optimize() + #m.update_likelihood_approximation() + #m.optimize() + m.pseudo_EM() print(m) m.plot() return m -def oil(): +def oil(num_inducing=50): """ Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. """ data = GPy.util.datasets.oil() - Y = data['Y'][:, 0:1] + X = data['X'][:600,:] + X_test = data['X'][600:,:] + Y = data['Y'][:600, 0:1] Y[Y.flatten()==-1] = 0 + Y_test = data['Y'][600:, 0:1] # Create GP model - m = GPy.models.GPClassification(data['X'], Y) + m = GPy.models.SparseGPClassification(X, Y,num_inducing=num_inducing) # Contrain all parameters to be positive m.constrain_positive('') m.tie_params('.*len') + m['.*len'] = 10. m.update_likelihood_approximation() # Optimize m.optimize() - print(m) + + #Test + probs = m.predict(X_test)[0] + GPy.util.classification.conf_matrix(probs,Y_test) return m def toy_linear_1d_classification(seed=default_seed): @@ -70,20 +78,20 @@ def toy_linear_1d_classification(seed=default_seed): m.ensure_default_constraints() # Optimize - m.update_likelihood_approximation() + #m.update_likelihood_approximation() # Parameters optimization: - m.optimize() + #m.optimize() + m.pseudo_EM() # Plot - pb.subplot(211) - m.plot_f() - pb.subplot(212) - m.plot() + fig, axes = pb.subplots(2,1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) print(m) return m -def sparse_toy_linear_1d_classification(seed=default_seed): +def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): """ Sparse 1D classification example :param seed : seed value for data generation (default is 4). @@ -95,25 +103,25 @@ def sparse_toy_linear_1d_classification(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - m = GPy.models.SparseGPClassification(data['X'], Y) - m['.*len']= 2. + m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing) + m['.*len']= 4. m.ensure_default_constraints() # Optimize - m.update_likelihood_approximation() + #m.update_likelihood_approximation() # Parameters optimization: - m.optimize() + #m.optimize() + m.pseudo_EM() # Plot - pb.subplot(211) - m.plot_f() - pb.subplot(212) - m.plot() + fig, axes = pb.subplots(2,1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) print(m) return m -def sparse_crescent_data(inducing=10, seed=default_seed): +def sparse_crescent_data(num_inducing=10, seed=default_seed): """ Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. @@ -128,16 +136,17 @@ def sparse_crescent_data(inducing=10, seed=default_seed): Y = data['Y'] Y[Y.flatten()==-1]=0 - m = GPy.models.SparseGPClassification(data['X'], Y) + m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing) m.ensure_default_constraints() m['.*len'] = 10. - m.update_likelihood_approximation() - m.optimize() + #m.update_likelihood_approximation() + #m.optimize() + m.pseudo_EM() print(m) m.plot() return m -def FITC_crescent_data(inducing=10, seed=default_seed): +def FITC_crescent_data(num_inducing=10, seed=default_seed): """ Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood. @@ -145,7 +154,7 @@ def FITC_crescent_data(inducing=10, seed=default_seed): :param seed : seed value for data generation. :type seed: int :param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). - :type inducing: int + :type num_inducing: int """ data = GPy.util.datasets.crescent_data(seed=seed) @@ -157,12 +166,12 @@ def FITC_crescent_data(inducing=10, seed=default_seed): Y = data['Y'] Y[Y.flatten()==-1]=0 - m = GPy.models.FITCClassification(data['X'], Y) + m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing) m.ensure_default_constraints() m['.*len'] = 3. - m.update_likelihood_approximation() - m.optimize() + #m.update_likelihood_approximation() + #m.optimize() + m.pseudo_EM() print(m) m.plot() return m - From ab6a87a4d54b888092c8628f5270f28306c089f3 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 5 Jun 2013 18:01:28 +0100 Subject: [PATCH 2/4] pseudo_EM modified --- GPy/core/model.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 582d7313..3634aee5 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -449,7 +449,7 @@ class Model(Parameterised): :type optimzer: string TODO: valid strings? """ - assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods" + assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 last_ll = -np.inf @@ -461,16 +461,13 @@ class Model(Parameterised): while not stop: last_approximation = self.likelihood.copy() last_params = self._get_params() - - self.likelihood.restart() self.update_likelihood_approximation() - new_ll = self.log_likelihood() ll_change = new_ll - last_ll if ll_change < 0: self.likelihood = last_approximation # restore previous likelihood approximation - self._set_params(last_params) # restore Model parameters + self._set_params(last_params) # restore model parameters print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change stop = True else: @@ -481,4 +478,4 @@ class Model(Parameterised): iteration += 1 if stop: print "%s iterations." % iteration - + self.update_likelihood_approximation() From 0c47ecac6fc9e56e612cdbb063087423da69b594 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Wed, 5 Jun 2013 18:01:45 +0100 Subject: [PATCH 3/4] MRD fix --- GPy/examples/dimensionality_reduction.py | 3 +++ GPy/models/mrd.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 8b2b7a78..dcb0d400 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -138,6 +138,9 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=4e3, plot # optimize if optimize: + m.constrain_fixed('noise') + m.optimize('scg', messages=1, max_f_eval=100, gtol=.05) + m.constrain_positive('noise') m.optimize('scg', messages=1, max_f_eval=max_f_eval, gtol=.05) if plot: diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 8ebff315..a31b044f 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -78,7 +78,7 @@ class MRD(Model): self.NQ = self.num_data * self.input_dim self.MQ = self.num_inducing * self.input_dim - model.__init__(self) # @UndefinedVariable + super(MRD, self).__init__() self._set_params(self._get_params()) @property From 616e8c9026941672b268e21a9b8c5194a07a1374 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 5 Jun 2013 18:01:53 +0100 Subject: [PATCH 4/4] Minor changes --- GPy/core/fitc.py | 3 ++- GPy/core/gp.py | 1 + GPy/core/sparse_gp.py | 2 +- GPy/models/fitc_classification.py | 4 ++-- GPy/testing/unit_tests.py | 14 ++++---------- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index 604db5e8..515abc29 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -36,8 +36,9 @@ class FITC(SparseGP): For a Gaussian likelihood, no iteration is required: this function does nothing """ + self.likelihood.restart() self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) - self._set_params(self._get_params()) # update the GP + self._set_params(self._get_params()) def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 246b8cc9..115c8d0f 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -67,6 +67,7 @@ class GP(GPBase): For a Gaussian likelihood, no iteration is required: this function does nothing """ + self.likelihood.restart() self.likelihood.fit_full(self.kern.K(self.X)) self._set_params(self._get_params()) # update the GP diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 2cfc8ae4..c1aed5d5 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -173,7 +173,7 @@ class SparseGP(GPBase): this function does nothing """ if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood - self.likelihood.restart() # TODO check consistency with pseudo_EP + self.likelihood.restart() if self.has_uncertain_inputs: Lmi = chol_inv(self.Lm) Kmmi = tdot(Lmi.T) diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py index 4ff441c6..e2c48b69 100644 --- a/GPy/models/fitc_classification.py +++ b/GPy/models/fitc_classification.py @@ -26,7 +26,7 @@ class FITCClassification(FITC): """ - def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): + def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): if kernel is None: kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) @@ -38,7 +38,7 @@ class FITCClassification(FITC): raise Warning, 'likelihood.data and Y are different.' if Z is None: - i = np.random.permutation(X.shape[0])[:M] + i = np.random.permutation(X.shape[0])[:num_inducing] Z = X[i].copy() else: assert Z.shape[1]==X.shape[1] diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 7ee9ef40..494ebf19 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -175,7 +175,6 @@ class GradientTests(unittest.TestCase): m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) - # self.assertTrue(m.EPEM) def test_sparse_EP_DTC_probit(self): N = 20 @@ -194,17 +193,12 @@ class GradientTests(unittest.TestCase): N = 20 X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] k = GPy.kern.rbf(1) + GPy.kern.white(1) - Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] - - distribution = GPy.likelihoods.likelihood_functions.Binomial() - likelihood = GPy.likelihoods.EP(Y, distribution) - #likelihood = GPy.inference.likelihoods.Binomial(Y) - m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4) - m.constrain_positive('(var|len)') - m.approximate_likelihood() + Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] + m = GPy.models.FITCClassification(X, Y=Y) + m.ensure_default_constraints() + m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) - if __name__ == "__main__": print "Running unit tests, please be (very) patient..." unittest.main()