2012-11-29 16:39:20 +00:00
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
2012-11-29 16:27:46 +00:00
"""
2013-02-01 16:21:26 +00:00
Gaussian Processes classification
2012-11-29 16:27:46 +00:00
"""
import pylab as pb
import numpy as np
import GPy
2013-05-17 17:17:30 +01:00
default_seed = 10000
2013-08-19 07:37:09 +02:00
def crescent_data ( seed = default_seed , kernel = None ) : # FIXME
2012-11-29 16:27:46 +00:00
""" Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
: param model_type : type of model to fit [ ' Full ' , ' FITC ' , ' DTC ' ] .
: param seed : seed value for data generation .
: type seed : int
: param inducing : number of inducing variables ( only used for ' FITC ' or ' DTC ' ) .
: type inducing : int
"""
2013-02-07 11:36:45 +00:00
2012-11-29 16:27:46 +00:00
data = GPy . util . datasets . crescent_data ( seed = seed )
2013-06-04 16:32:12 +01:00
Y = data [ ' Y ' ]
Y [ Y . flatten ( ) == - 1 ] = 0
2013-02-07 11:36:45 +00:00
2013-06-05 14:39:32 +01:00
m = GPy . models . GPClassification ( data [ ' X ' ] , Y )
2013-06-05 18:01:05 +01:00
#m.update_likelihood_approximation()
#m.optimize()
m . pseudo_EM ( )
2012-11-29 16:27:46 +00:00
print ( m )
m . plot ( )
return m
2013-08-19 07:37:09 +02:00
def oil ( num_inducing = 50 , max_iters = 100 , kernel = None ) :
2013-02-01 16:21:26 +00:00
"""
2013-08-19 07:37:09 +02:00
Run a Gaussian process classification on the three phase oil data . The demonstration calls the basic GP classification model and uses EP to approximate the likelihood .
2013-02-01 16:21:26 +00:00
"""
2012-11-29 16:27:46 +00:00
data = GPy . util . datasets . oil ( )
2013-08-19 07:37:09 +02:00
X = data [ ' X ' ]
Xtest = data [ ' Xtest ' ]
Y = data [ ' Y ' ] [ : , 0 : 1 ]
Ytest = data [ ' Ytest ' ] [ : , 0 : 1 ]
2013-06-04 16:32:12 +01:00
Y [ Y . flatten ( ) == - 1 ] = 0
2013-08-19 07:37:09 +02:00
Ytest [ Ytest . flatten ( ) == - 1 ] = 0
2013-06-04 16:32:12 +01:00
2013-02-01 16:21:26 +00:00
# Create GP model
2013-08-19 07:37:09 +02:00
m = GPy . models . SparseGPClassification ( X , Y , kernel = kernel , num_inducing = num_inducing )
2013-02-01 16:21:26 +00:00
# Contrain all parameters to be positive
2013-06-04 18:54:29 +01:00
m . tie_params ( ' .*len ' )
2013-06-05 18:01:05 +01:00
m [ ' .*len ' ] = 10.
2013-02-01 16:21:26 +00:00
m . update_likelihood_approximation ( )
2012-11-29 16:27:46 +00:00
2013-02-01 16:21:26 +00:00
# Optimize
2013-08-19 07:37:09 +02:00
m . optimize ( max_iters = max_iters )
2012-11-29 16:27:46 +00:00
print ( m )
2013-06-05 18:01:05 +01:00
#Test
2013-08-19 07:37:09 +02:00
probs = m . predict ( Xtest ) [ 0 ]
GPy . util . classification . conf_matrix ( probs , Ytest )
2012-11-29 16:27:46 +00:00
return m
2013-02-01 16:21:26 +00:00
def toy_linear_1d_classification ( seed = default_seed ) :
"""
Simple 1 D classification example
2012-11-29 16:27:46 +00:00
: param seed : seed value for data generation ( default is 4 ) .
: type seed : int
"""
2013-02-01 16:21:26 +00:00
2012-11-29 16:27:46 +00:00
data = GPy . util . datasets . toy_linear_1d_classification ( seed = seed )
2013-02-07 11:36:45 +00:00
Y = data [ ' Y ' ] [ : , 0 : 1 ]
2013-06-04 16:23:04 +01:00
Y [ Y . flatten ( ) == - 1 ] = 0
2012-11-29 16:27:46 +00:00
2013-02-01 16:21:26 +00:00
# Model definition
2013-06-05 14:39:32 +01:00
m = GPy . models . GPClassification ( data [ ' X ' ] , Y )
2012-11-29 16:27:46 +00:00
2013-02-01 16:21:26 +00:00
# Optimize
2013-06-05 18:01:05 +01:00
#m.update_likelihood_approximation()
2013-03-11 11:41:46 +00:00
# Parameters optimization:
2013-06-05 18:01:05 +01:00
#m.optimize()
m . pseudo_EM ( )
2013-02-01 16:21:26 +00:00
# Plot
2013-06-05 18:01:05 +01:00
fig , axes = pb . subplots ( 2 , 1 )
m . plot_f ( ax = axes [ 0 ] )
m . plot ( ax = axes [ 1 ] )
2012-11-29 16:27:46 +00:00
print ( m )
2013-02-01 16:21:26 +00:00
2012-11-29 16:27:46 +00:00
return m
2013-03-11 14:05:56 +00:00
2013-06-05 18:01:05 +01:00
def sparse_toy_linear_1d_classification ( num_inducing = 10 , seed = default_seed ) :
2013-03-11 14:05:56 +00:00
"""
2013-05-15 18:12:10 +01:00
Sparse 1 D classification example
2013-03-11 14:05:56 +00:00
: param seed : seed value for data generation ( default is 4 ) .
: type seed : int
"""
data = GPy . util . datasets . toy_linear_1d_classification ( seed = seed )
Y = data [ ' Y ' ] [ : , 0 : 1 ]
2013-06-04 16:32:12 +01:00
Y [ Y . flatten ( ) == - 1 ] = 0
2013-03-11 14:05:56 +00:00
# Model definition
2013-06-05 18:01:05 +01:00
m = GPy . models . SparseGPClassification ( data [ ' X ' ] , Y , num_inducing = num_inducing )
m [ ' .*len ' ] = 4.
2013-03-11 14:05:56 +00:00
# Optimize
2013-06-05 18:01:05 +01:00
#m.update_likelihood_approximation()
2013-03-11 14:05:56 +00:00
# Parameters optimization:
2013-06-05 18:01:05 +01:00
#m.optimize()
m . pseudo_EM ( )
2013-03-11 14:05:56 +00:00
# Plot
2013-06-05 18:01:05 +01:00
fig , axes = pb . subplots ( 2 , 1 )
m . plot_f ( ax = axes [ 0 ] )
m . plot ( ax = axes [ 1 ] )
2013-03-11 14:05:56 +00:00
print ( m )
return m
2013-08-19 08:51:58 +02:00
def sparse_crescent_data ( num_inducing = 10 , seed = default_seed , kernel = None ) :
2013-06-05 16:30:57 +01:00
"""
Run a Gaussian process classification with DTC approxiamtion on the crescent data . The demonstration calls the basic GP classification model and uses EP to approximate the likelihood .
2013-03-11 14:05:56 +00:00
: param model_type : type of model to fit [ ' Full ' , ' FITC ' , ' DTC ' ] .
: param seed : seed value for data generation .
: type seed : int
: param inducing : number of inducing variables ( only used for ' FITC ' or ' DTC ' ) .
: type inducing : int
"""
data = GPy . util . datasets . crescent_data ( seed = seed )
2013-06-04 16:32:12 +01:00
Y = data [ ' Y ' ]
Y [ Y . flatten ( ) == - 1 ] = 0
2013-03-11 14:05:56 +00:00
2013-08-19 07:37:09 +02:00
m = GPy . models . SparseGPClassification ( data [ ' X ' ] , Y , kernel = kernel , num_inducing = num_inducing )
2013-06-04 18:54:29 +01:00
m [ ' .*len ' ] = 10.
2013-06-05 18:01:05 +01:00
#m.update_likelihood_approximation()
#m.optimize()
m . pseudo_EM ( )
2013-03-11 14:05:56 +00:00
print ( m )
m . plot ( )
return m
2013-06-05 14:11:28 +01:00
2013-06-05 18:01:05 +01:00
def FITC_crescent_data ( num_inducing = 10 , seed = default_seed ) :
2013-06-05 16:30:57 +01:00
"""
Run a Gaussian process classification with FITC approximation on the crescent data . The demonstration uses EP to approximate the likelihood .
2013-06-05 14:11:28 +01:00
: param model_type : type of model to fit [ ' Full ' , ' FITC ' , ' DTC ' ] .
: param seed : seed value for data generation .
: type seed : int
: param inducing : number of inducing variables ( only used for ' FITC ' or ' DTC ' ) .
2013-06-05 18:01:05 +01:00
: type num_inducing : int
2013-06-05 14:11:28 +01:00
"""
data = GPy . util . datasets . crescent_data ( seed = seed )
Y = data [ ' Y ' ]
Y [ Y . flatten ( ) == - 1 ] = 0
2013-06-05 18:01:05 +01:00
m = GPy . models . FITCClassification ( data [ ' X ' ] , Y , num_inducing = num_inducing )
2013-06-05 18:57:26 +01:00
m . constrain_bounded ( ' .*len ' , 1. , 1e3 )
2013-06-05 16:30:57 +01:00
m [ ' .*len ' ] = 3.
2013-06-05 18:01:05 +01:00
#m.update_likelihood_approximation()
#m.optimize()
m . pseudo_EM ( )
2013-06-05 14:11:28 +01:00
print ( m )
m . plot ( )
return m