Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Nicolas 2013-06-05 18:06:11 +01:00
commit 08ecf75e89
9 changed files with 57 additions and 52 deletions

View file

@ -36,8 +36,9 @@ class FITC(SparseGP):
For a Gaussian likelihood, no iteration is required: For a Gaussian likelihood, no iteration is required:
this function does nothing this function does nothing
""" """
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params())
def _compute_kernel_matrices(self): def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation

View file

@ -67,6 +67,7 @@ class GP(GPBase):
For a Gaussian likelihood, no iteration is required: For a Gaussian likelihood, no iteration is required:
this function does nothing this function does nothing
""" """
self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X)) self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP

View file

@ -449,7 +449,7 @@ class Model(Parameterised):
:type optimzer: string TODO: valid strings? :type optimzer: string TODO: valid strings?
""" """
assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods" assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
ll_change = epsilon + 1. ll_change = epsilon + 1.
iteration = 0 iteration = 0
last_ll = -np.inf last_ll = -np.inf
@ -461,16 +461,13 @@ class Model(Parameterised):
while not stop: while not stop:
last_approximation = self.likelihood.copy() last_approximation = self.likelihood.copy()
last_params = self._get_params() last_params = self._get_params()
self.likelihood.restart()
self.update_likelihood_approximation() self.update_likelihood_approximation()
new_ll = self.log_likelihood() new_ll = self.log_likelihood()
ll_change = new_ll - last_ll ll_change = new_ll - last_ll
if ll_change < 0: if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore Model parameters self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True stop = True
else: else:
@ -481,4 +478,4 @@ class Model(Parameterised):
iteration += 1 iteration += 1
if stop: if stop:
print "%s iterations." % iteration print "%s iterations." % iteration
self.update_likelihood_approximation()

View file

@ -173,7 +173,7 @@ class SparseGP(GPBase):
this function does nothing this function does nothing
""" """
if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
self.likelihood.restart() # TODO check consistency with pseudo_EP self.likelihood.restart()
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
Lmi = chol_inv(self.Lm) Lmi = chol_inv(self.Lm)
Kmmi = tdot(Lmi.T) Kmmi = tdot(Lmi.T)

View file

@ -26,32 +26,40 @@ def crescent_data(seed=default_seed): # FIXME
m = GPy.models.GPClassification(data['X'], Y) m = GPy.models.GPClassification(data['X'], Y)
m.ensure_default_constraints() m.ensure_default_constraints()
m.update_likelihood_approximation() #m.update_likelihood_approximation()
m.optimize() #m.optimize()
m.pseudo_EM()
print(m) print(m)
m.plot() m.plot()
return m return m
def oil(): def oil(num_inducing=50):
""" """
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
Y = data['Y'][:, 0:1] X = data['X'][:600,:]
X_test = data['X'][600:,:]
Y = data['Y'][:600, 0:1]
Y[Y.flatten()==-1] = 0 Y[Y.flatten()==-1] = 0
Y_test = data['Y'][600:, 0:1]
# Create GP model # Create GP model
m = GPy.models.GPClassification(data['X'], Y) m = GPy.models.SparseGPClassification(X, Y,num_inducing=num_inducing)
# Contrain all parameters to be positive # Contrain all parameters to be positive
m.constrain_positive('') m.constrain_positive('')
m.tie_params('.*len') m.tie_params('.*len')
m['.*len'] = 10.
m.update_likelihood_approximation() m.update_likelihood_approximation()
# Optimize # Optimize
m.optimize() m.optimize()
print(m) print(m)
#Test
probs = m.predict(X_test)[0]
GPy.util.classification.conf_matrix(probs,Y_test)
return m return m
def toy_linear_1d_classification(seed=default_seed): def toy_linear_1d_classification(seed=default_seed):
@ -70,20 +78,20 @@ def toy_linear_1d_classification(seed=default_seed):
m.ensure_default_constraints() m.ensure_default_constraints()
# Optimize # Optimize
m.update_likelihood_approximation() #m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize() #m.optimize()
m.pseudo_EM()
# Plot # Plot
pb.subplot(211) fig, axes = pb.subplots(2,1)
m.plot_f() m.plot_f(ax=axes[0])
pb.subplot(212) m.plot(ax=axes[1])
m.plot()
print(m) print(m)
return m return m
def sparse_toy_linear_1d_classification(seed=default_seed): def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
""" """
Sparse 1D classification example Sparse 1D classification example
:param seed : seed value for data generation (default is 4). :param seed : seed value for data generation (default is 4).
@ -95,25 +103,25 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
# Model definition # Model definition
m = GPy.models.SparseGPClassification(data['X'], Y) m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
m['.*len']= 2. m['.*len']= 4.
m.ensure_default_constraints() m.ensure_default_constraints()
# Optimize # Optimize
m.update_likelihood_approximation() #m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize() #m.optimize()
m.pseudo_EM()
# Plot # Plot
pb.subplot(211) fig, axes = pb.subplots(2,1)
m.plot_f() m.plot_f(ax=axes[0])
pb.subplot(212) m.plot(ax=axes[1])
m.plot()
print(m) print(m)
return m return m
def sparse_crescent_data(inducing=10, seed=default_seed): def sparse_crescent_data(num_inducing=10, seed=default_seed):
""" """
Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
@ -128,16 +136,17 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1]=0 Y[Y.flatten()==-1]=0
m = GPy.models.SparseGPClassification(data['X'], Y) m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m['.*len'] = 10. m['.*len'] = 10.
m.update_likelihood_approximation() #m.update_likelihood_approximation()
m.optimize() #m.optimize()
m.pseudo_EM()
print(m) print(m)
m.plot() m.plot()
return m return m
def FITC_crescent_data(inducing=10, seed=default_seed): def FITC_crescent_data(num_inducing=10, seed=default_seed):
""" """
Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood. Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood.
@ -145,7 +154,7 @@ def FITC_crescent_data(inducing=10, seed=default_seed):
:param seed : seed value for data generation. :param seed : seed value for data generation.
:type seed: int :type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int :type num_inducing: int
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
@ -157,12 +166,12 @@ def FITC_crescent_data(inducing=10, seed=default_seed):
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1]=0 Y[Y.flatten()==-1]=0
m = GPy.models.FITCClassification(data['X'], Y) m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m['.*len'] = 3. m['.*len'] = 3.
m.update_likelihood_approximation() #m.update_likelihood_approximation()
m.optimize() #m.optimize()
m.pseudo_EM()
print(m) print(m)
m.plot() m.plot()
return m return m

View file

@ -138,6 +138,9 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=4e3, plot
# optimize # optimize
if optimize: if optimize:
m.constrain_fixed('noise')
m.optimize('scg', messages=1, max_f_eval=100, gtol=.05)
m.constrain_positive('noise')
m.optimize('scg', messages=1, max_f_eval=max_f_eval, gtol=.05) m.optimize('scg', messages=1, max_f_eval=max_f_eval, gtol=.05)
if plot: if plot:

View file

@ -26,7 +26,7 @@ class FITCClassification(FITC):
""" """
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
@ -38,7 +38,7 @@ class FITCClassification(FITC):
raise Warning, 'likelihood.data and Y are different.' raise Warning, 'likelihood.data and Y are different.'
if Z is None: if Z is None:
i = np.random.permutation(X.shape[0])[:M] i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy() Z = X[i].copy()
else: else:
assert Z.shape[1]==X.shape[1] assert Z.shape[1]==X.shape[1]

View file

@ -78,7 +78,7 @@ class MRD(Model):
self.NQ = self.num_data * self.input_dim self.NQ = self.num_data * self.input_dim
self.MQ = self.num_inducing * self.input_dim self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable super(MRD, self).__init__()
self._set_params(self._get_params()) self._set_params(self._get_params())
@property @property

View file

@ -175,7 +175,6 @@ class GradientTests(unittest.TestCase):
m.ensure_default_constraints() m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
# self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self): def test_sparse_EP_DTC_probit(self):
N = 20 N = 20
@ -194,17 +193,12 @@ class GradientTests(unittest.TestCase):
N = 20 N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
m = GPy.models.FITCClassification(X, Y=Y)
distribution = GPy.likelihoods.likelihood_functions.Binomial() m.ensure_default_constraints()
likelihood = GPy.likelihoods.EP(Y, distribution) m.update_likelihood_approximation()
#likelihood = GPy.inference.likelihoods.Binomial(Y)
m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4)
m.constrain_positive('(var|len)')
m.approximate_likelihood()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() unittest.main()