GPy/GPy/examples/classification.py

196 lines
5.2 KiB
Python
Raw Normal View History

# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
2012-11-29 16:27:46 +00:00
"""
Gaussian Processes classification
2012-11-29 16:27:46 +00:00
"""
import pylab as pb
import numpy as np
import GPy
2013-05-17 17:17:30 +01:00
default_seed = 10000
def crescent_data(seed=default_seed): # FIXME
2012-11-29 16:27:46 +00:00
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
2012-11-29 16:27:46 +00:00
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
2012-11-29 16:27:46 +00:00
2013-05-17 17:17:30 +01:00
m = GPy.models.GP(data['X'], likelihood, kernel)
2013-03-11 11:41:46 +00:00
m.ensure_default_constraints()
2012-11-29 16:27:46 +00:00
m.update_likelihood_approximation()
2012-11-29 16:27:46 +00:00
print(m)
# optimize
m.optimize()
2012-11-29 16:27:46 +00:00
print(m)
# plot
m.plot()
return m
def oil():
"""
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
"""
2012-11-29 16:27:46 +00:00
data = GPy.util.datasets.oil()
Y = data['Y'][:, 0:1]
Y[Y.flatten()==-1] = 0
# Kernel object
kernel = GPy.kern.rbf(12)
2012-11-29 16:27:46 +00:00
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
2012-11-29 16:27:46 +00:00
# Create GP model
2013-06-04 18:16:08 +01:00
m = GPy.models.GP_classification(data['X'], Y, kernel=kernel)
# Contrain all parameters to be positive
2012-11-29 16:27:46 +00:00
m.constrain_positive('')
2013-03-11 16:46:47 +00:00
m.tie_params('lengthscale')
m.update_likelihood_approximation()
2012-11-29 16:27:46 +00:00
# Optimize
2012-11-29 16:27:46 +00:00
m.optimize()
print(m)
return m
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
2012-11-29 16:27:46 +00:00
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
2012-11-29 16:27:46 +00:00
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
2013-06-04 16:23:04 +01:00
Y[Y.flatten() == -1] = 0
2012-11-29 16:27:46 +00:00
# Kernel object
kernel = GPy.kern.rbf(1)
2012-11-29 16:27:46 +00:00
# Likelihood object
2013-06-04 16:23:04 +01:00
link = GPy.likelihoods.link_functions.probit
distribution = GPy.likelihoods.likelihood_functions.binomial(link)
2013-05-17 17:17:30 +01:00
likelihood = GPy.likelihoods.EP(Y, distribution)
2013-06-04 18:16:08 +01:00
Y[1] = 1
2012-11-29 16:27:46 +00:00
# Model definition
2013-06-04 18:16:08 +01:00
#m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
m = GPy.models.GP_classification(data['X'], Y, likelihood=likelihood, kernel=kernel)
2013-03-11 11:41:46 +00:00
m.ensure_default_constraints()
2012-11-29 16:27:46 +00:00
# Optimize
2013-03-11 11:41:46 +00:00
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
2013-05-17 17:17:30 +01:00
# m.pseudo_EM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
2013-02-01 17:58:21 +00:00
m.plot()
2012-11-29 16:27:46 +00:00
print(m)
2012-11-29 16:27:46 +00:00
return m
2013-03-11 14:05:56 +00:00
def sparse_toy_linear_1d_classification(seed=default_seed):
"""
2013-05-15 18:12:10 +01:00
Sparse 1D classification example
2013-03-11 14:05:56 +00:00
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
2013-03-11 14:05:56 +00:00
# Kernel object
2013-03-11 18:18:23 +00:00
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
2013-03-11 14:05:56 +00:00
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.binomial()
2013-05-17 17:17:30 +01:00
likelihood = GPy.likelihoods.EP(Y, distribution)
2013-03-11 14:05:56 +00:00
2013-05-17 17:17:30 +01:00
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
2013-03-11 14:05:56 +00:00
# Model definition
2013-05-17 17:17:30 +01:00
m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z, normalize_X=False)
m.set('len', 2.)
2013-03-11 14:05:56 +00:00
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
2013-05-17 17:17:30 +01:00
# m.EPEM() #FIXME
2013-03-11 14:05:56 +00:00
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m
def sparse_crescent_data(inducing=10, seed=default_seed):
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
2013-03-11 14:05:56 +00:00
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.binomial()
likelihood = GPy.likelihoods.EP(Y, distribution)
2013-03-11 14:05:56 +00:00
2013-05-17 17:17:30 +01:00
sample = np.random.randint(0, data['X'].shape[0], inducing)
Z = data['X'][sample, :]
2013-03-11 14:05:56 +00:00
# create sparse GP EP model
2013-05-17 17:17:30 +01:00
m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z)
2013-03-11 14:05:56 +00:00
m.ensure_default_constraints()
2013-05-17 17:17:30 +01:00
m.set('len', 10.)
2013-03-11 14:05:56 +00:00
m.update_likelihood_approximation()
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m