2012-11-29 16:39:20 +00:00
|
|
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
|
|
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
|
|
|
|
|
|
|
|
|
2012-11-29 16:27:46 +00:00
|
|
|
"""
|
2013-02-01 16:21:26 +00:00
|
|
|
Gaussian Processes classification
|
2012-11-29 16:27:46 +00:00
|
|
|
"""
|
|
|
|
|
import pylab as pb
|
|
|
|
|
import GPy
|
|
|
|
|
|
2013-05-17 17:17:30 +01:00
|
|
|
default_seed = 10000
|
2012-11-29 16:27:46 +00:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
2013-02-01 16:21:26 +00:00
|
|
|
"""
|
2013-08-19 07:37:09 +02:00
|
|
|
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
2013-09-20 13:38:20 +01:00
|
|
|
|
2013-02-01 16:21:26 +00:00
|
|
|
"""
|
2012-11-29 16:27:46 +00:00
|
|
|
data = GPy.util.datasets.oil()
|
2013-08-19 07:37:09 +02:00
|
|
|
X = data['X']
|
|
|
|
|
Xtest = data['Xtest']
|
|
|
|
|
Y = data['Y'][:, 0:1]
|
|
|
|
|
Ytest = data['Ytest'][:, 0:1]
|
2013-06-04 16:32:12 +01:00
|
|
|
Y[Y.flatten()==-1] = 0
|
2013-08-19 07:37:09 +02:00
|
|
|
Ytest[Ytest.flatten()==-1] = 0
|
2013-06-04 16:32:12 +01:00
|
|
|
|
2013-02-01 16:21:26 +00:00
|
|
|
# Create GP model
|
2014-01-24 09:41:07 +00:00
|
|
|
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
|
2013-02-01 16:21:26 +00:00
|
|
|
|
|
|
|
|
# Contrain all parameters to be positive
|
2013-06-04 18:54:29 +01:00
|
|
|
m.tie_params('.*len')
|
2013-06-05 18:01:05 +01:00
|
|
|
m['.*len'] = 10.
|
2013-02-01 16:21:26 +00:00
|
|
|
m.update_likelihood_approximation()
|
2012-11-29 16:27:46 +00:00
|
|
|
|
2013-02-01 16:21:26 +00:00
|
|
|
# Optimize
|
2014-01-24 09:41:07 +00:00
|
|
|
if optimize:
|
|
|
|
|
m.optimize(max_iters=max_iters)
|
2012-11-29 16:27:46 +00:00
|
|
|
print(m)
|
2013-06-05 18:01:05 +01:00
|
|
|
|
|
|
|
|
#Test
|
2013-08-19 07:37:09 +02:00
|
|
|
probs = m.predict(Xtest)[0]
|
2014-01-24 09:41:07 +00:00
|
|
|
GPy.util.classification.conf_matrix(probs, Ytest)
|
2012-11-29 16:27:46 +00:00
|
|
|
return m
|
|
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
2013-02-01 16:21:26 +00:00
|
|
|
"""
|
2014-01-24 09:41:07 +00:00
|
|
|
Simple 1D classification example using EP approximation
|
2013-09-20 13:38:20 +01:00
|
|
|
|
|
|
|
|
:param seed: seed value for data generation (default is 4).
|
2012-11-29 16:27:46 +00:00
|
|
|
:type seed: int
|
2013-09-20 13:38:20 +01:00
|
|
|
|
2012-11-29 16:27:46 +00:00
|
|
|
"""
|
2013-02-01 16:21:26 +00:00
|
|
|
|
2012-11-29 16:27:46 +00:00
|
|
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
2013-02-07 11:36:45 +00:00
|
|
|
Y = data['Y'][:, 0:1]
|
2013-06-04 16:23:04 +01:00
|
|
|
Y[Y.flatten() == -1] = 0
|
2012-11-29 16:27:46 +00:00
|
|
|
|
2013-02-01 16:21:26 +00:00
|
|
|
# Model definition
|
2013-06-05 14:39:32 +01:00
|
|
|
m = GPy.models.GPClassification(data['X'], Y)
|
2012-11-29 16:27:46 +00:00
|
|
|
|
2013-02-01 16:21:26 +00:00
|
|
|
# Optimize
|
2014-01-24 09:41:07 +00:00
|
|
|
if optimize:
|
|
|
|
|
#m.update_likelihood_approximation()
|
|
|
|
|
# Parameters optimization:
|
|
|
|
|
#m.optimize()
|
|
|
|
|
#m.update_likelihood_approximation()
|
|
|
|
|
m.pseudo_EM()
|
2013-02-01 16:21:26 +00:00
|
|
|
|
|
|
|
|
# Plot
|
2014-01-24 09:41:07 +00:00
|
|
|
if plot:
|
|
|
|
|
fig, axes = pb.subplots(2, 1)
|
|
|
|
|
m.plot_f(ax=axes[0])
|
|
|
|
|
m.plot(ax=axes[1])
|
|
|
|
|
|
|
|
|
|
print m
|
|
|
|
|
return m
|
|
|
|
|
|
|
|
|
|
def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
|
|
|
|
|
"""
|
|
|
|
|
Simple 1D classification example using Laplace approximation
|
|
|
|
|
|
|
|
|
|
:param seed: seed value for data generation (default is 4).
|
|
|
|
|
:type seed: int
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
|
|
|
|
Y = data['Y'][:, 0:1]
|
|
|
|
|
Y[Y.flatten() == -1] = 0
|
|
|
|
|
|
2014-02-12 16:48:57 +00:00
|
|
|
likelihood = GPy.likelihoods.Bernoulli()
|
|
|
|
|
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
2014-03-14 11:47:23 +00:00
|
|
|
kernel = GPy.kern.RBF(1)
|
2014-01-24 09:41:07 +00:00
|
|
|
|
|
|
|
|
# Model definition
|
2014-02-12 16:48:57 +00:00
|
|
|
m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
|
2013-02-01 16:21:26 +00:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
# Optimize
|
|
|
|
|
if optimize:
|
|
|
|
|
#m.update_likelihood_approximation()
|
|
|
|
|
# Parameters optimization:
|
2014-02-12 16:48:57 +00:00
|
|
|
try:
|
|
|
|
|
m.optimize('scg', messages=1)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return m
|
|
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
#m.pseudo_EM()
|
|
|
|
|
|
|
|
|
|
# Plot
|
|
|
|
|
if plot:
|
|
|
|
|
fig, axes = pb.subplots(2, 1)
|
|
|
|
|
m.plot_f(ax=axes[0])
|
|
|
|
|
m.plot(ax=axes[1])
|
|
|
|
|
|
|
|
|
|
print m
|
2012-11-29 16:27:46 +00:00
|
|
|
return m
|
2013-03-11 14:05:56 +00:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
|
2013-03-11 14:05:56 +00:00
|
|
|
"""
|
2013-05-15 18:12:10 +01:00
|
|
|
Sparse 1D classification example
|
2013-09-20 13:38:20 +01:00
|
|
|
|
|
|
|
|
:param seed: seed value for data generation (default is 4).
|
2013-03-11 14:05:56 +00:00
|
|
|
:type seed: int
|
2013-09-20 13:38:20 +01:00
|
|
|
|
2013-03-11 14:05:56 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
|
|
|
|
Y = data['Y'][:, 0:1]
|
2013-06-04 16:32:12 +01:00
|
|
|
Y[Y.flatten() == -1] = 0
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
# Model definition
|
2014-01-24 09:41:07 +00:00
|
|
|
m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing)
|
|
|
|
|
m['.*len'] = 4.
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
# Optimize
|
2014-01-24 09:41:07 +00:00
|
|
|
if optimize:
|
|
|
|
|
#m.update_likelihood_approximation()
|
|
|
|
|
# Parameters optimization:
|
|
|
|
|
#m.optimize()
|
|
|
|
|
m.pseudo_EM()
|
2013-03-11 14:05:56 +00:00
|
|
|
|
|
|
|
|
# Plot
|
2014-01-24 09:41:07 +00:00
|
|
|
if plot:
|
|
|
|
|
fig, axes = pb.subplots(2, 1)
|
|
|
|
|
m.plot_f(ax=axes[0])
|
|
|
|
|
m.plot(ax=axes[1])
|
2013-03-11 14:05:56 +00:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
print m
|
2013-03-11 14:05:56 +00:00
|
|
|
return m
|
|
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
2013-09-16 16:55:12 +01:00
|
|
|
"""
|
|
|
|
|
Simple 1D classification example using a heavy side gp transformation
|
2013-09-20 13:38:20 +01:00
|
|
|
|
|
|
|
|
:param seed: seed value for data generation (default is 4).
|
2013-09-16 16:55:12 +01:00
|
|
|
:type seed: int
|
2013-09-20 13:38:20 +01:00
|
|
|
|
2013-09-16 16:55:12 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
|
|
|
|
Y = data['Y'][:, 0:1]
|
|
|
|
|
Y[Y.flatten() == -1] = 0
|
|
|
|
|
|
|
|
|
|
# Model definition
|
2014-01-24 09:41:07 +00:00
|
|
|
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
|
|
|
|
likelihood = GPy.likelihoods.EP(Y, noise_model)
|
2013-09-16 16:55:12 +01:00
|
|
|
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
|
|
|
|
|
|
|
|
|
# Optimize
|
2014-01-24 09:41:07 +00:00
|
|
|
if optimize:
|
|
|
|
|
m.update_likelihood_approximation()
|
|
|
|
|
# Parameters optimization:
|
|
|
|
|
m.optimize()
|
|
|
|
|
#m.pseudo_EM()
|
2013-09-16 16:55:12 +01:00
|
|
|
|
|
|
|
|
# Plot
|
2014-01-24 09:41:07 +00:00
|
|
|
if plot:
|
|
|
|
|
fig, axes = pb.subplots(2, 1)
|
|
|
|
|
m.plot_f(ax=axes[0])
|
|
|
|
|
m.plot(ax=axes[1])
|
2013-09-16 16:55:12 +01:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
print m
|
2013-09-16 16:55:12 +01:00
|
|
|
return m
|
|
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
|
2013-09-20 11:40:00 +01:00
|
|
|
"""
|
|
|
|
|
Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
|
|
|
|
|
|
|
|
|
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
|
|
|
|
|
:param inducing: number of inducing variables (only used for 'FITC' or 'DTC').
|
|
|
|
|
:type inducing: int
|
|
|
|
|
:param seed: seed value for data generation.
|
|
|
|
|
:type seed: int
|
|
|
|
|
:param kernel: kernel to use in the model
|
|
|
|
|
:type kernel: a GPy kernel
|
|
|
|
|
"""
|
|
|
|
|
data = GPy.util.datasets.crescent_data(seed=seed)
|
|
|
|
|
Y = data['Y']
|
|
|
|
|
Y[Y.flatten()==-1] = 0
|
|
|
|
|
|
|
|
|
|
if model_type == 'Full':
|
2014-01-24 09:41:07 +00:00
|
|
|
m = GPy.models.GPClassification(data['X'], Y, kernel=kernel)
|
2013-09-20 11:40:00 +01:00
|
|
|
|
|
|
|
|
elif model_type == 'DTC':
|
|
|
|
|
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
|
|
|
|
m['.*len'] = 10.
|
|
|
|
|
|
|
|
|
|
elif model_type == 'FITC':
|
|
|
|
|
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
|
|
|
|
m['.*len'] = 3.
|
|
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
if optimize:
|
|
|
|
|
m.pseudo_EM()
|
|
|
|
|
|
|
|
|
|
if plot:
|
|
|
|
|
m.plot()
|
2013-09-20 11:40:00 +01:00
|
|
|
|
2014-01-24 09:41:07 +00:00
|
|
|
print m
|
2013-09-20 11:40:00 +01:00
|
|
|
return m
|