Merge branch 'master' of github.com:SheffieldML/GPy

This commit is contained in:
James Hensman 2013-03-11 14:28:40 +00:00
commit d430fe986f
13 changed files with 208 additions and 379 deletions

View file

@ -1,37 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(123344)
N = 10
M = 3
Q = 2
D = 4
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
m.ensure_default_constraints()
m.randomize()
m.checkgrad(verbose = 1)

View file

@ -1,9 +1,8 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
import classification import classification
import regression import regression
import unsupervised import dimensionality_reduction
import non_gaussian
import tutorials import tutorials

View file

@ -107,3 +107,80 @@ def toy_linear_1d_classification(seed=default_seed):
print(m) print(m)
return m return m
def sparse_toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
# Model definition
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m
def sparse_crescent_data(inducing=10, seed=default_seed):
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
sample = np.random.randint(0,data['X'].shape[0],inducing)
Z = data['X'][sample,:]
#Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
m.set('len',10.)
m.update_likelihood_approximation()
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m

View file

@ -0,0 +1,56 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
default_seed = np.random.seed(123344)
def BGPLVM(seed = default_seed):
N = 10
M = 3
Q = 2
D = 4
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
m.ensure_default_constraints()
m.randomize()
m.checkgrad(verbose = 1)
return m
def GPLVM_oil_100():
data = GPy.util.datasets.oil_100()
# create simple GP model
m = GPy.models.GPLVM(data['X'], 2)
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
print(m)
return m

View file

@ -11,7 +11,7 @@ import GPy
default_seed=10000 default_seed=10000
def toy_1d(seed=default_seed): def toy_poisson_1d(seed=default_seed):
""" """
Simple 1D classification example Simple 1D classification example
:param seed : seed value for data generation (default is 4). :param seed : seed value for data generation (default is 4).

View file

@ -1,57 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import cPickle as pickle
import numpy as np
import pylab as pb
import GPy
import pylab as plt
np.random.seed(3)
def plot_oil(X, theta, labels, label):
plt.figure()
X = X[:,np.argsort(theta)[:2]]
flow_type = (X[labels[:,0]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
flow_type = (X[labels[:,1]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
flow_type = (X[labels[:,2]==1])
plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
plt.title(label)
data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
Y = data['DataTrn']
N, D = Y.shape
selected = np.random.permutation(N)[:350]
labels = data['DataTrnLbls'][selected]
Y = Y[selected]
N, D = Y.shape
Y -= Y.mean(axis=0)
# Y /= Y.std(axis=0)
Q = 5
k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
m.constrain_positive('(rbf|bias|S|linear|white|noise)')
# m.unconstrain('noise')
# m.constrain_fixed('noise_precision', 50.0)
# m.unconstrain('white')
# m.constrain_bounded('white', 1e-6, 10.0)
# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
#m.optimize(messages = True)
# m.optimize('tnc', messages = True)
# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
# # pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
# m = GPy.models.GPLVM(Y, Q)
# m.constrain_positive('(white|rbf|bias|noise)')
# m.optimize()
# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')

View file

@ -108,9 +108,6 @@ def coregionalisation_toy2():
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2) pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
return m return m
def coregionalisation_toy(): def coregionalisation_toy():
""" """
A simple demonstration of coregionalisation on two sinusoidal functions A simple demonstration of coregionalisation on two sinusoidal functions
@ -211,7 +208,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
xlim = ax.get_xlim() xlim = ax.get_xlim()
ylim = ax.get_ylim() ylim = ax.get_ylim()
# Now run a few optimizations # Now run a few optimizations
models = [] models = []
optim_point_x = np.empty(2) optim_point_x = np.empty(2)
@ -219,18 +216,18 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
np.random.seed(seed=seed) np.random.seed(seed=seed)
for i in range(0, model_restarts): for i in range(0, model_restarts):
kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.)) kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern) m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
optim_point_x[0] = m.get('rbf_lengthscale') optim_point_x[0] = m.get('rbf_lengthscale')
optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(xtol=1e-6,ftol=1e-6) m.optimize(xtol=1e-6,ftol=1e-6)
optim_point_x[1] = m.get('rbf_lengthscale') optim_point_x[1] = m.get('rbf_lengthscale')
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
models.append(m) models.append(m)
@ -264,7 +261,7 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0] total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
noise_var *= total_var noise_var *= total_var
signal_var *= total_var signal_var *= total_var
kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var) kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel) model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
@ -273,3 +270,70 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
lls.append(length_scale_lls) lls.append(length_scale_lls)
return np.array(lls) return np.array(lls)
def sparse_GP_regression_1D(N = 400, M = 5):
"""Run a 1D example of a sparse GP regression."""
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.constrain_positive('(variance|lengthscale|precision)')
m.checkgrad(verbose=1)
m.optimize('tnc', messages = 1)
m.plot()
return m
def sparse_GP_regression_2D(N = 400, M = 50):
"""Run a 2D example of a sparse GP regression."""
X = np.random.uniform(-3.,3.,(N,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(2)
noise = GPy.kern.white(2)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
# contrain all parameters to be positive (but not inducing inputs)
m.constrain_positive('(variance|lengthscale|precision)')
m.set('len',2.)
m.checkgrad()
# optimize and plot
pb.figure()
m.optimize('tnc', messages = 1)
m.plot()
print(m)
return m
def uncertain_inputs_sparse_regression():
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
# sample inputs and outputs
S = np.ones((20,1))
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X)+np.random.randn(20,1)*0.05
likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3.,3.,(7,1))
k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.sparse_GP(X, likelihood, kernel=k, Z=Z, X_uncertainty=S)
# contrain all parameters to be positive
m.constrain_positive('(variance|prec)')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000, messages=1)
m.plot()
print(m)
return m

View file

@ -1,30 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(1)
print "sparse GPLVM with RBF kernel"
N = 100
M = 8
Q = 1
D = 2
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.sparse_GPLVM(Y, Q, M=M)
m.constrain_positive('(rbf|bias|noise|white)')
pb.figure()
m.plot()
pb.title('PCA initialisation')
pb.figure()
m.optimize(messages = 1)
m.plot()
pb.title('After optimisation')

View file

@ -1,64 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 400
M = 5
######################################
## 1 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
# construct kernel
rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1)
kernel = rbf + noise
# create simple GP model
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.constrain_positive('(variance|lengthscale|precision)')
m.checkgrad(verbose=1)
m.optimize('tnc', messages = 1)
m.plot()
######################################
## 2 dimensional example
# # sample inputs and outputs
# X = np.random.uniform(-3.,3.,(N,2))
# Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
# # construct kernel
# rbf = GPy.kern.rbf(2)
# noise = GPy.kern.white(2)
# kernel = rbf + noise
# # create simple GP model
# m2 = GPy.models.sparse_GP_regression(X,Y,kernel, M = 50)
# create simple GP model
# # contrain all parameters to be positive (but not inducing inputs)
# m2.constrain_positive('(variance|lengthscale|precision)')
# #check gradient FIXME unit test please
# m2.checkgrad()
# # optimize and plot
# pb.figure()
# m2.optimize('tnc', messages = 1)
# m2.plot()
# print(m2)

View file

@ -1,95 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
N = 500
M = 5
default_seed=10000
def crescent_data(inducing=10, seed=default_seed):
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
# Kernel object
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(data['Y'],distribution)
sample = np.random.randint(0,data['X'].shape[0],inducing)
Z = data['X'][sample,:]
#Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
print(m)
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
# Model definition
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m

View file

@ -1,27 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import numpy as np
import GPy
pb.ion()
pb.close('all')
# sample inputs and outputs
S = np.ones((20,1))
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X)+np.random.randn(20,1)*0.05
k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.sparse_GP_regression(X,Y,X_uncertainty=S,kernel=k)
# contrain all parameters to be positive
m.constrain_positive('(variance|prec)')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000, messages=1)
m.plot()
print(m)

View file

@ -1,32 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
"""
Sparse Gaussian Processes regression with an RBF kernel,
using the uncollapsed sparse GP (where the distribution of the
inducing points is explicitley represented)
"""
import pylab as pb
import numpy as np
import GPy
np.random.seed(2)
pb.ion()
N = 500
M = 20
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model
m = GPy.models.uncollapsed_sparse_GP(X, Y, kernel=kernel, M=M)#, X_uncertainty=np.zeros_like(X)+0.01)
# contrain all parameters to be positive
m.ensure_default_constraints()
m.checkgrad()
# optimize and plot
m.plot()

View file

@ -1,25 +0,0 @@
"""
Usupervised learning with Gaussian Processes.
"""
import pylab as pb
import numpy as np
import GPy
######################################
## Oil data subsampled to 100 points.
def oil_100():
data = GPy.util.datasets.oil_100()
# create simple GP model
m = GPy.models.GPLVM(data['X'], 2)
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
print(m)
return m