GPy/GPy/examples/classification.py

# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)


"""
Gaussian Processes classification
"""
import pylab as pb
import numpy as np
import GPy

default_seed = 10000
def crescent_data(seed=default_seed): # FIXME
    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
    :param seed : seed value for data generation.
    :type seed: int
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """

    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1])

    # Likelihood object
    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)


    m = GPy.models.GP(data['X'], likelihood, kernel)
    m.ensure_default_constraints()

    m.update_likelihood_approximation()
    print(m)

    # optimize
    m.optimize()
    print(m)

    # plot
    m.plot()
    return m

def oil():
    """
    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
    """
    data = GPy.util.datasets.oil()
    Y = data['Y'][:, 0:1]
    Y[Y.flatten()==-1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(12)

    # Likelihood object
    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)

    # Create GP model
    m = GPy.models.GP_classification(data['X'], Y, kernel=kernel)

    # Contrain all parameters to be positive
    m.constrain_positive('')
    m.tie_params('lengthscale')
    m.update_likelihood_approximation()

    # Optimize
    m.optimize()

    print(m)
    return m

def toy_linear_1d_classification(seed=default_seed):
    """
    Simple 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(1)

    # Likelihood object
    link = GPy.likelihoods.link_functions.probit
    distribution = GPy.likelihoods.likelihood_functions.binomial(link)
    likelihood = GPy.likelihoods.EP(Y, distribution)
    Y[1] = 1

    # Model definition
    #m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
    m = GPy.models.GP_classification(data['X'], Y, likelihood=likelihood, kernel=kernel)
    m.ensure_default_constraints()

    # Optimize
    m.update_likelihood_approximation()
    # Parameters optimization:
    m.optimize()
    # m.pseudo_EM() #FIXME

    # Plot
    pb.subplot(211)
    m.plot_f()
    pb.subplot(212)
    m.plot()
    print(m)

    return m

def sparse_toy_linear_1d_classification(seed=default_seed):
    """
    Sparse 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    # Kernel object
    kernel = GPy.kern.rbf(1) + GPy.kern.white(1)

    # Likelihood object
    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)

    Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))

    # Model definition
    m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z, normalize_X=False)
    m.set('len', 2.)

    m.ensure_default_constraints()
    # Optimize
    m.update_likelihood_approximation()
    # Parameters optimization:
    m.optimize()
    # m.EPEM() #FIXME

    # Plot
    pb.subplot(211)
    m.plot_f()
    pb.subplot(212)
    m.plot()
    print(m)

    return m

def sparse_crescent_data(inducing=10, seed=default_seed):
    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
    :param seed : seed value for data generation.
    :type seed: int
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """

    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1]=0

    # Kernel object
    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])

    # Likelihood object
    distribution = GPy.likelihoods.likelihood_functions.binomial()
    likelihood = GPy.likelihoods.EP(Y, distribution)

    sample = np.random.randint(0, data['X'].shape[0], inducing)
    Z = data['X'][sample, :]

    # create sparse GP EP model
    m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z)
    m.ensure_default_constraints()
    m.set('len', 10.)

    m.update_likelihood_approximation()

    # optimize
    m.optimize()
    print(m)

    # plot
    m.plot()
    return m
added copyright notice and license at the top 2012-11-29 16:39:20 +00:00			`# Copyright (c) 2012, GPy authors (see AUTHORS.txt).`
			`# Licensed under the BSD 3-clause license (see LICENSE.txt)`


examples 2012-11-29 16:27:46 +00:00			`"""`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`Gaussian Processes classification`
examples 2012-11-29 16:27:46 +00:00			`"""`
			`import pylab as pb`
			`import numpy as np`
			`import GPy`

SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`default_seed = 10000`
			`def crescent_data(seed=default_seed): # FIXME`
examples 2012-11-29 16:27:46 +00:00			`"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.`

			`:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].`
			`:param seed : seed value for data generation.`
			`:type seed: int`
			`:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').`
			`:type inducing: int`
			`"""`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00
examples 2012-11-29 16:27:46 +00:00			`data = GPy.util.datasets.crescent_data(seed=seed)`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y = data['Y']`
			`Y[Y.flatten()==-1] = 0`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00
			`# Kernel object`
			`kernel = GPy.kern.rbf(data['X'].shape[1])`

			`# Likelihood object`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`distribution = GPy.likelihoods.likelihood_functions.binomial()`
			`likelihood = GPy.likelihoods.EP(Y, distribution)`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00
examples 2012-11-29 16:27:46 +00:00
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`m = GPy.models.GP(data['X'], likelihood, kernel)`
Irrelevant changes 2013-03-11 11:41:46 +00:00			`m.ensure_default_constraints()`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`m.update_likelihood_approximation()`
examples 2012-11-29 16:27:46 +00:00			`print(m)`

			`# optimize`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00			`m.optimize()`
examples 2012-11-29 16:27:46 +00:00			`print(m)`

			`# plot`
			`m.plot()`
			`return m`

			`def oil():`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`"""`
			`Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.`
			`"""`
examples 2012-11-29 16:27:46 +00:00			`data = GPy.util.datasets.oil()`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y = data['Y'][:, 0:1]`
			`Y[Y.flatten()==-1] = 0`

Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Kernel object`
			`kernel = GPy.kern.rbf(12)`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Likelihood object`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`distribution = GPy.likelihoods.likelihood_functions.binomial()`
			`likelihood = GPy.likelihoods.EP(Y, distribution)`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Create GP model`
new GP_classification model 2013-06-04 18:16:08 +01:00			`m = GPy.models.GP_classification(data['X'], Y, kernel=kernel)`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
			`# Contrain all parameters to be positive`
examples 2012-11-29 16:27:46 +00:00			`m.constrain_positive('')`
changes tie_param to tie_params 2013-03-11 16:46:47 +00:00			`m.tie_params('lengthscale')`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`m.update_likelihood_approximation()`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Optimize`
examples 2012-11-29 16:27:46 +00:00			`m.optimize()`

			`print(m)`
			`return m`

Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`def toy_linear_1d_classification(seed=default_seed):`
			`"""`
			`Simple 1D classification example`
examples 2012-11-29 16:27:46 +00:00			`:param seed : seed value for data generation (default is 4).`
			`:type seed: int`
			`"""`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
examples 2012-11-29 16:27:46 +00:00			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00			`Y = data['Y'][:, 0:1]`
Link functions defined 2013-06-04 16:23:04 +01:00			`Y[Y.flatten() == -1] = 0`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Kernel object`
			`kernel = GPy.kern.rbf(1)`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Likelihood object`
Link functions defined 2013-06-04 16:23:04 +01:00			`link = GPy.likelihoods.link_functions.probit`
			`distribution = GPy.likelihoods.likelihood_functions.binomial(link)`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`likelihood = GPy.likelihoods.EP(Y, distribution)`
new GP_classification model 2013-06-04 18:16:08 +01:00			`Y[1] = 1`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Model definition`
new GP_classification model 2013-06-04 18:16:08 +01:00			`#m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)`
			`m = GPy.models.GP_classification(data['X'], Y, likelihood=likelihood, kernel=kernel)`
Irrelevant changes 2013-03-11 11:41:46 +00:00			`m.ensure_default_constraints()`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Optimize`
Irrelevant changes 2013-03-11 11:41:46 +00:00			`m.update_likelihood_approximation()`
			`# Parameters optimization:`
			`m.optimize()`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`# m.pseudo_EM() #FIXME`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
			`# Plot`
			`pb.subplot(211)`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00			`m.plot_f()`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`pb.subplot(212)`
Changes in plotting functions. 2013-02-01 17:58:21 +00:00			`m.plot()`
examples 2012-11-29 16:27:46 +00:00			`print(m)`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
examples 2012-11-29 16:27:46 +00:00			`return m`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`def sparse_toy_linear_1d_classification(seed=default_seed):`
			`"""`
convenient but not important changes 2013-05-15 18:12:10 +01:00			`Sparse 1D classification example`
examples directory organized. 2013-03-11 14:05:56 +00:00			`:param seed : seed value for data generation (default is 4).`
			`:type seed: int`
			`"""`

			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
			`Y = data['Y'][:, 0:1]`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y[Y.flatten() == -1] = 0`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Kernel object`
Example fixed 2013-03-11 18:18:23 +00:00			`kernel = GPy.kern.rbf(1) + GPy.kern.white(1)`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Likelihood object`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`distribution = GPy.likelihoods.likelihood_functions.binomial()`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`likelihood = GPy.likelihoods.EP(Y, distribution)`
examples directory organized. 2013-03-11 14:05:56 +00:00
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Model definition`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z, normalize_X=False)`
			`m.set('len', 2.)`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`m.ensure_default_constraints()`
			`# Optimize`
			`m.update_likelihood_approximation()`
			`# Parameters optimization:`
			`m.optimize()`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`# m.EPEM() #FIXME`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Plot`
			`pb.subplot(211)`
			`m.plot_f()`
			`pb.subplot(212)`
			`m.plot()`
			`print(m)`

			`return m`

			`def sparse_crescent_data(inducing=10, seed=default_seed):`
			`"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.`

			`:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].`
			`:param seed : seed value for data generation.`
			`:type seed: int`
			`:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').`
			`:type inducing: int`
			`"""`

			`data = GPy.util.datasets.crescent_data(seed=seed)`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y = data['Y']`
			`Y[Y.flatten()==-1]=0`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Kernel object`
			`kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])`

			`# Likelihood object`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`distribution = GPy.likelihoods.likelihood_functions.binomial()`
			`likelihood = GPy.likelihoods.EP(Y, distribution)`
examples directory organized. 2013-03-11 14:05:56 +00:00
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`sample = np.random.randint(0, data['X'].shape[0], inducing)`
			`Z = data['X'][sample, :]`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# create sparse GP EP model`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`m = GPy.models.sparse_GP(data['X'], likelihood=likelihood, kernel=kernel, Z=Z)`
examples directory organized. 2013-03-11 14:05:56 +00:00			`m.ensure_default_constraints()`
SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`m.set('len', 10.)`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`m.update_likelihood_approximation()`

			`# optimize`
			`m.optimize()`
			`print(m)`

			`# plot`
			`m.plot()`
			`return m`