Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
Neil Lawrence 2015-05-08 11:44:26 +01:00
commit c05540dc31
170 changed files with 30768 additions and 2183 deletions

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import classification
import regression
import dimensionality_reduction
import non_gaussian
from . import classification
from . import regression
from . import dimensionality_reduction
from . import non_gaussian

View file

@ -15,7 +15,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.oil()
X = data['X']
Xtest = data['Xtest']
@ -52,7 +52,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -75,7 +75,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print m
print(m)
return m
def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
@ -88,7 +88,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -114,7 +114,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print m
print(m)
return m
def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
@ -127,7 +127,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -147,7 +147,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print m
print(m)
return m
def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
@ -160,7 +160,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
@ -177,7 +177,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
# Parameters optimization:
for _ in range(5):
m.optimize(max_iters=int(max_iters/5))
print m
print(m)
# Plot
if plot:
@ -186,7 +186,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print m
print(m)
return m
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
@ -202,7 +202,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
:type kernel: a GPy kernel
"""
try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
@ -224,5 +224,5 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
if plot:
m.plot()
print m
print(m)
return m

View file

@ -335,7 +335,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.likelihood.variance = .1
if optimize:
print "Optimizing model:"
print("Optimizing model:")
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
@ -360,7 +360,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
m.likelihood.variance = .1
if optimize:
print "Optimizing model:"
print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
@ -390,7 +390,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m.Yreal = Y
if optimize:
print "Optimizing model:"
print("Optimizing model:")
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
@ -414,7 +414,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
m['.*noise'] = [Y.var() / 40. for Y in Ylist]
if optimize:
print "Optimizing Model:"
print("Optimizing Model:")
m.optimize(messages=verbose, max_iters=8e3)
if plot:
m.X.plot("MRD Latent Space 1D")
@ -442,7 +442,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
initx="random", initz='permute', **kw)
if optimize:
print "Optimizing Model:"
print("Optimizing Model:")
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
if plot:
m.X.plot("MRD Latent Space 1D")
@ -607,7 +607,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
try:
if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
except KeyboardInterrupt:
print "Keyboard interrupt, continuing to plot and return"
print("Keyboard interrupt, continuing to plot and return")
if plot:
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
@ -658,7 +658,7 @@ def ssgplvm_simulation_linear():
def sample_X(Q, pi):
x = np.empty(Q)
dies = np.random.rand(Q)
for q in xrange(Q):
for q in range(Q):
if dies[q] < pi:
x[q] = np.random.randn()
else:
@ -668,7 +668,7 @@ def ssgplvm_simulation_linear():
Y = np.empty((N, D))
X = np.empty((N, Q))
# Generate data from random sampled weight matrices
for n in xrange(N):
for n in range(N):
X[n] = sample_X(Q, pi)
w = np.random.randn(D, Q)
Y[n] = np.dot(w, X[n])

View file

@ -37,7 +37,7 @@ def student_t_approx(optimize=True, plot=True):
#Add student t random noise to datapoints
deg_free = 1
print "Real noise: ", real_std
print("Real noise: ", real_std)
initial_var_guess = 0.5
edited_real_sd = initial_var_guess
@ -73,7 +73,7 @@ def student_t_approx(optimize=True, plot=True):
m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
m4['.*white'].constrain_fixed(1e-5)
m4.randomize()
print m4
print(m4)
debug=True
if debug:
m4.optimize(messages=1)
@ -81,18 +81,18 @@ def student_t_approx(optimize=True, plot=True):
pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx')
m4.plot()
print m4
print(m4)
return m4
if optimize:
optimizer='scg'
print "Clean Gaussian"
print("Clean Gaussian")
m1.optimize(optimizer, messages=1)
print "Corrupt Gaussian"
print("Corrupt Gaussian")
m2.optimize(optimizer, messages=1)
print "Clean student t"
print("Clean student t")
m3.optimize(optimizer, messages=1)
print "Corrupt student t"
print("Corrupt student t")
m4.optimize(optimizer, messages=1)
if plot:
@ -151,7 +151,7 @@ def boston_example(optimize=True, plot=True):
for n, (train, test) in enumerate(kf):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
print "Fold {}".format(n)
print("Fold {}".format(n))
noise = 1e-1 #np.exp(-2)
rbf_len = 0.5
@ -163,21 +163,21 @@ def boston_example(optimize=True, plot=True):
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
#Gaussian GP
print "Gauss GP"
print("Gauss GP")
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.constrain_fixed('.*white', 1e-5)
mgp['.*len'] = rbf_len
mgp['.*noise'] = noise
print mgp
print(mgp)
if optimize:
mgp.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mgp.predict(X_test)
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
print mgp
print pred_density
print(mgp)
print(pred_density)
print "Gaussian Laplace GP"
print("Gaussian Laplace GP")
N, D = Y_train.shape
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
@ -186,18 +186,18 @@ def boston_example(optimize=True, plot=True):
mg.constrain_fixed('.*white', 1e-5)
mg['rbf_len'] = rbf_len
mg['noise'] = noise
print mg
print(mg)
if optimize:
mg.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mg.predict(X_test)
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
print pred_density
print mg
print(pred_density)
print(mg)
for stu_num, df in enumerate(degrees_freedoms):
#Student T
print "Student-T GP {}df".format(df)
print("Student-T GP {}df".format(df))
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
@ -205,14 +205,14 @@ def boston_example(optimize=True, plot=True):
mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len
mstu_t['.*t_scale2'] = noise
print mstu_t
print(mstu_t)
if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mstu_t.predict(X_test)
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
print pred_density
print mstu_t
print(pred_density)
print(mstu_t)
if plot:
plt.figure()
@ -230,8 +230,8 @@ def boston_example(optimize=True, plot=True):
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Stu t {}df'.format(df))
print "Average scores: {}".format(np.mean(score_folds, 1))
print "Average pred density: {}".format(np.mean(pred_density, 1))
print("Average scores: {}".format(np.mean(score_folds, 1)))
print("Average pred density: {}".format(np.mean(pred_density, 1)))
if plot:
#Plotting

View file

@ -15,7 +15,7 @@ def olympic_marathon_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Olympic marathon data."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.olympic_marathon_men()
@ -88,7 +88,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
"""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.epomeo_gpx()
num_data_list = []
@ -135,7 +135,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :]
@ -219,7 +219,7 @@ def olympic_100m_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.olympic_100m_men()
@ -240,7 +240,7 @@ def toy_rbf_1d(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.toy_rbf_1d()
@ -258,7 +258,7 @@ def toy_rbf_1d_50(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.toy_rbf_1d_50()
@ -377,7 +377,7 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
"""Predict the location of a robot given wirelss signal strength readings."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.robot_wireless()
@ -398,14 +398,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
sse = ((data['Xtest'] - Xpredict)**2).sum()
print('Sum of squares error on test data: ' + str(sse))
print(('Sum of squares error on test data: ' + str(sse)))
return m
def silhouette(max_iters=100, optimize=True, plot=True):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
print('pods unavailable, see https://github.com/sods/ods for example datasets')
return
data = pods.datasets.silhouette()
@ -416,7 +416,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
if optimize:
m.optimize(messages=True, max_iters=max_iters)
print m
print(m)
return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
if plot:
m.plot()
print m
print(m)
return m
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
@ -492,7 +492,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
if plot:
m.plot(ax=axes[0])
axes[0].set_title('no input uncertainty')
print m
print(m)
# the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
@ -503,5 +503,50 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
axes[1].set_title('with input uncertainty')
fig.canvas.draw()
print m
print(m)
return m
def simple_mean_function(max_iters=100, optimize=True, plot=True):
"""
The simplest possible mean function. No parameters, just a simple Sinusoid.
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
mf.update_gradients = lambda a,b: None
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot(plot_limits=(-10,15))
return m
def parametric_mean_function(max_iters=100, optimize=True, plot=True):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot()
return m