GPy/GPy/examples/classification.py

# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)


"""
Gaussian Processes classification
"""
import pylab as pb
import GPy

default_seed = 10000

def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
    """
    Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    """
    data = GPy.util.datasets.oil()
    X = data['X']
    Xtest = data['Xtest']
    Y = data['Y'][:, 0:1]
    Ytest = data['Ytest'][:, 0:1]
    Y[Y.flatten()==-1] = 0
    Ytest[Ytest.flatten()==-1] = 0

    # Create GP model
    m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)

    # Contrain all parameters to be positive
    m.tie_params('.*len')
    m['.*len'] = 10.
    m.update_likelihood_approximation()

    # Optimize
    if optimize:
        m.optimize(max_iters=max_iters)
    print(m)

    #Test
    probs = m.predict(Xtest)[0]
    GPy.util.classification.conf_matrix(probs, Ytest)
    return m

def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
    """
    Simple 1D classification example using EP approximation

    :param seed: seed value for data generation (default is 4).
    :type seed: int

    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    # Model definition
    m = GPy.models.GPClassification(data['X'], Y)

    # Optimize
    if optimize:
        #m.update_likelihood_approximation()
        # Parameters optimization:
        #m.optimize()
        #m.update_likelihood_approximation()
        m.pseudo_EM()

    # Plot
    if plot:
        fig, axes = pb.subplots(2, 1)
        m.plot_f(ax=axes[0])
        m.plot(ax=axes[1])

    print m
    return m

def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
    """
    Simple 1D classification example using Laplace approximation

    :param seed: seed value for data generation (default is 4).
    :type seed: int

    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    likelihood = GPy.likelihoods.Bernoulli()
    laplace_inf = GPy.inference.latent_function_inference.Laplace()
    kernel = GPy.kern.RBF(1)

    # Model definition
    m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)

    # Optimize
    if optimize:
        #m.update_likelihood_approximation()
        # Parameters optimization:
        try:
            m.optimize('scg', messages=1)
        except Exception as e:
            return m

        #m.pseudo_EM()

    # Plot
    if plot:
        fig, axes = pb.subplots(2, 1)
        m.plot_f(ax=axes[0])
        m.plot(ax=axes[1])

    print m
    return m

def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
    """
    Sparse 1D classification example

    :param seed: seed value for data generation (default is 4).
    :type seed: int

    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    # Model definition
    m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing)
    m['.*len'] = 4.

    # Optimize
    if optimize:
        #m.update_likelihood_approximation()
        # Parameters optimization:
        #m.optimize()
        m.pseudo_EM()

    # Plot
    if plot:
        fig, axes = pb.subplots(2, 1)
        m.plot_f(ax=axes[0])
        m.plot(ax=axes[1])

    print m
    return m

def toy_heaviside(seed=default_seed, optimize=True, plot=True):
    """
    Simple 1D classification example using a heavy side gp transformation

    :param seed: seed value for data generation (default is 4).
    :type seed: int

    """

    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
    Y = data['Y'][:, 0:1]
    Y[Y.flatten() == -1] = 0

    # Model definition
    noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
    likelihood = GPy.likelihoods.EP(Y, noise_model)
    m = GPy.models.GPClassification(data['X'], likelihood=likelihood)

    # Optimize
    if optimize:
        m.update_likelihood_approximation()
        # Parameters optimization:
        m.optimize()
        #m.pseudo_EM()

    # Plot
    if plot:
        fig, axes = pb.subplots(2, 1)
        m.plot_f(ax=axes[0])
        m.plot(ax=axes[1])

    print m
    return m

def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
    """
    Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
    :param inducing: number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    :param seed: seed value for data generation.
    :type seed: int
    :param kernel: kernel to use in the model
    :type kernel: a GPy kernel
    """
    data = GPy.util.datasets.crescent_data(seed=seed)
    Y = data['Y']
    Y[Y.flatten()==-1] = 0

    if model_type == 'Full':
        m = GPy.models.GPClassification(data['X'], Y, kernel=kernel)

    elif model_type == 'DTC':
        m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
        m['.*len'] = 10.

    elif model_type == 'FITC':
        m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
        m['.*len'] = 3.

    if optimize:
        m.pseudo_EM()

    if plot:
        m.plot()

    print m
    return m
added copyright notice and license at the top 2012-11-29 16:39:20 +00:00			`# Copyright (c) 2012, GPy authors (see AUTHORS.txt).`
			`# Licensed under the BSD 3-clause license (see LICENSE.txt)`


examples 2012-11-29 16:27:46 +00:00			`"""`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`Gaussian Processes classification`
examples 2012-11-29 16:27:46 +00:00			`"""`
			`import pylab as pb`
			`import GPy`

SCG printing prettyfied 2013-05-17 17:17:30 +01:00			`default_seed = 10000`
examples 2012-11-29 16:27:46 +00:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`"""`
Minor fixes to classification to allow kernel choice, change of oil example to use full test set and full training set. 2013-08-19 07:37:09 +02:00			`Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`"""`
examples 2012-11-29 16:27:46 +00:00			`data = GPy.util.datasets.oil()`
Minor fixes to classification to allow kernel choice, change of oil example to use full test set and full training set. 2013-08-19 07:37:09 +02:00			`X = data['X']`
			`Xtest = data['Xtest']`
			`Y = data['Y'][:, 0:1]`
			`Ytest = data['Ytest'][:, 0:1]`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y[Y.flatten()==-1] = 0`
Minor fixes to classification to allow kernel choice, change of oil example to use full test set and full training set. 2013-08-19 07:37:09 +02:00			`Ytest[Ytest.flatten()==-1] = 0`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Create GP model`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
			`# Contrain all parameters to be positive`
examples corrected 2013-06-04 18:54:29 +01:00			`m.tie_params('.*len')`
examples corrected 2013-06-05 18:01:05 +01:00			`m['.*len'] = 10.`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`m.update_likelihood_approximation()`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Optimize`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if optimize:`
			`m.optimize(max_iters=max_iters)`
examples 2012-11-29 16:27:46 +00:00			`print(m)`
examples corrected 2013-06-05 18:01:05 +01:00
			`#Test`
Minor fixes to classification to allow kernel choice, change of oil example to use full test set and full training set. 2013-08-19 07:37:09 +02:00			`probs = m.predict(Xtest)[0]`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`GPy.util.classification.conf_matrix(probs, Ytest)`
examples 2012-11-29 16:27:46 +00:00			`return m`

hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`"""`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`Simple 1D classification example using EP approximation`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
			`:param seed: seed value for data generation (default is 4).`
examples 2012-11-29 16:27:46 +00:00			`:type seed: int`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
examples 2012-11-29 16:27:46 +00:00			`"""`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
examples 2012-11-29 16:27:46 +00:00			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
Probit likelihood modified for plotting. 2013-02-07 11:36:45 +00:00			`Y = data['Y'][:, 0:1]`
Link functions defined 2013-06-04 16:23:04 +01:00			`Y[Y.flatten() == -1] = 0`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Model definition`
Fixed naming to standardized PEP8 2013-06-05 14:39:32 +01:00			`m = GPy.models.GPClassification(data['X'], Y)`
examples 2012-11-29 16:27:46 +00:00
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00			`# Optimize`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if optimize:`
			`#m.update_likelihood_approximation()`
			`# Parameters optimization:`
			`#m.optimize()`
			`#m.update_likelihood_approximation()`
			`m.pseudo_EM()`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
			`# Plot`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if plot:`
			`fig, axes = pb.subplots(2, 1)`
			`m.plot_f(ax=axes[0])`
			`m.plot(ax=axes[1])`

			`print m`
			`return m`

			`def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):`
			`"""`
			`Simple 1D classification example using Laplace approximation`

			`:param seed: seed value for data generation (default is 4).`
			`:type seed: int`

			`"""`

			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
			`Y = data['Y'][:, 0:1]`
			`Y[Y.flatten() == -1] = 0`

Fixed bernoulli likelihood divide by 0 and log of 0 2014-02-12 16:48:57 +00:00			`likelihood = GPy.likelihoods.Bernoulli()`
			`laplace_inf = GPy.inference.latent_function_inference.Laplace()`
fixes to EP 2014-03-14 11:47:23 +00:00			`kernel = GPy.kern.RBF(1)`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00
			`# Model definition`
Fixed bernoulli likelihood divide by 0 and log of 0 2014-02-12 16:48:57 +00:00			`m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)`
Classification examples corrected (2/3) 2013-02-01 16:21:26 +00:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`# Optimize`
			`if optimize:`
			`#m.update_likelihood_approximation()`
			`# Parameters optimization:`
Fixed bernoulli likelihood divide by 0 and log of 0 2014-02-12 16:48:57 +00:00			`try:`
			`m.optimize('scg', messages=1)`
			`except Exception as e:`
			`return m`

hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`#m.pseudo_EM()`

			`# Plot`
			`if plot:`
			`fig, axes = pb.subplots(2, 1)`
			`m.plot_f(ax=axes[0])`
			`m.plot(ax=axes[1])`

			`print m`
examples 2012-11-29 16:27:46 +00:00			`return m`
examples directory organized. 2013-03-11 14:05:56 +00:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):`
examples directory organized. 2013-03-11 14:05:56 +00:00			`"""`
convenient but not important changes 2013-05-15 18:12:10 +01:00			`Sparse 1D classification example`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
			`:param seed: seed value for data generation (default is 4).`
examples directory organized. 2013-03-11 14:05:56 +00:00			`:type seed: int`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
examples directory organized. 2013-03-11 14:05:56 +00:00			`"""`

			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
			`Y = data['Y'][:, 0:1]`
Examples changed to use new link_functions 2013-06-04 16:32:12 +01:00			`Y[Y.flatten() == -1] = 0`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Model definition`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing)`
			`m['.*len'] = 4.`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Optimize`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if optimize:`
			`#m.update_likelihood_approximation()`
			`# Parameters optimization:`
			`#m.optimize()`
			`m.pseudo_EM()`
examples directory organized. 2013-03-11 14:05:56 +00:00
			`# Plot`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if plot:`
			`fig, axes = pb.subplots(2, 1)`
			`m.plot_f(ax=axes[0])`
			`m.plot(ax=axes[1])`
examples directory organized. 2013-03-11 14:05:56 +00:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`print m`
examples directory organized. 2013-03-11 14:05:56 +00:00			`return m`

hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`def toy_heaviside(seed=default_seed, optimize=True, plot=True):`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00			`"""`
			`Simple 1D classification example using a heavy side gp transformation`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
			`:param seed: seed value for data generation (default is 4).`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00			`:type seed: int`
Fixed docstring warnings - could still be mistakes 2013-09-20 13:38:20 +01:00
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00			`"""`

			`data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)`
			`Y = data['Y'][:, 0:1]`
			`Y[Y.flatten() == -1] = 0`

			`# Model definition`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())`
			`likelihood = GPy.likelihoods.EP(Y, noise_model)`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00			`m = GPy.models.GPClassification(data['X'], likelihood=likelihood)`

			`# Optimize`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if optimize:`
			`m.update_likelihood_approximation()`
			`# Parameters optimization:`
			`m.optimize()`
			`#m.pseudo_EM()`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00
			`# Plot`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if plot:`
			`fig, axes = pb.subplots(2, 1)`
			`m.plot_f(ax=axes[0])`
			`m.plot(ax=axes[1])`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`print m`
Heaviside transformation fixed 2013-09-16 16:55:12 +01:00			`return m`

hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):`
crescent data example is better organized 2013-09-20 11:40:00 +01:00			`"""`
			`Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.`

			`:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].`
			`:param inducing: number of inducing variables (only used for 'FITC' or 'DTC').`
			`:type inducing: int`
			`:param seed: seed value for data generation.`
			`:type seed: int`
			`:param kernel: kernel to use in the model`
			`:type kernel: a GPy kernel`
			`"""`
			`data = GPy.util.datasets.crescent_data(seed=seed)`
			`Y = data['Y']`
			`Y[Y.flatten()==-1] = 0`

			`if model_type == 'Full':`
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`m = GPy.models.GPClassification(data['X'], Y, kernel=kernel)`
crescent data example is better organized 2013-09-20 11:40:00 +01:00
			`elif model_type == 'DTC':`
			`m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)`
			`m['.*len'] = 10.`

			`elif model_type == 'FITC':`
			`m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)`
			`m['.*len'] = 3.`

hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`if optimize:`
			`m.pseudo_EM()`

			`if plot:`
			`m.plot()`
crescent data example is better organized 2013-09-20 11:40:00 +01:00
hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. 2014-01-24 09:41:07 +00:00			`print m`
crescent data example is better organized 2013-09-20 11:40:00 +01:00			`return m`