From 375e2f6225d1f628a062fa70d727c171dcda6136 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Fri, 24 Jan 2014 09:41:07 +0000 Subject: [PATCH] hard-merging in the examples and testing dirs from master. This is probably a dumb way to do it, but I don;t know better. --- GPy/examples/classification.py | 130 ++-- GPy/examples/dimensionality_reduction.py | 514 ++++++++-------- GPy/examples/non_gaussian.py | 286 +++++++++ GPy/examples/regression.py | 348 ++++++----- GPy/examples/stochastic.py | 30 +- GPy/examples/tutorials.py | 79 +-- GPy/testing/bgplvm_tests.py | 2 +- GPy/testing/examples_tests.py | 56 +- GPy/testing/gp_transformation_tests.py | 61 ++ GPy/testing/kernel_tests.py | 33 +- GPy/testing/likelihood_tests.py | 686 ++++++++++++++++++++++ GPy/testing/parameter_testing.py | 141 ----- GPy/testing/psi_stat_expectation_tests.py | 59 +- GPy/testing/psi_stat_gradient_tests.py | 72 ++- GPy/testing/sparse_gplvm_tests.py | 2 +- GPy/testing/unit_tests.py | 6 +- 16 files changed, 1747 insertions(+), 758 deletions(-) create mode 100644 GPy/examples/non_gaussian.py create mode 100644 GPy/testing/gp_transformation_tests.py create mode 100644 GPy/testing/likelihood_tests.py delete mode 100644 GPy/testing/parameter_testing.py diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index da2ffb24..f9aaddd1 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -6,12 +6,11 @@ Gaussian Processes classification """ import pylab as pb -import numpy as np import GPy default_seed = 10000 -def oil(num_inducing=50, max_iters=100, kernel=None): +def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True): """ Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. @@ -25,7 +24,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None): Ytest[Ytest.flatten()==-1] = 0 # Create GP model - m = GPy.models.SparseGPClassification(X, Y,kernel=kernel,num_inducing=num_inducing) + m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing) # Contrain all parameters to be positive m.tie_params('.*len') @@ -33,17 +32,18 @@ def oil(num_inducing=50, max_iters=100, kernel=None): m.update_likelihood_approximation() # Optimize - m.optimize(max_iters=max_iters) + if optimize: + m.optimize(max_iters=max_iters) print(m) #Test probs = m.predict(Xtest)[0] - GPy.util.classification.conf_matrix(probs,Ytest) + GPy.util.classification.conf_matrix(probs, Ytest) return m -def toy_linear_1d_classification(seed=default_seed): +def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True): """ - Simple 1D classification example + Simple 1D classification example using EP approximation :param seed: seed value for data generation (default is 4). :type seed: int @@ -58,20 +58,59 @@ def toy_linear_1d_classification(seed=default_seed): m = GPy.models.GPClassification(data['X'], Y) # Optimize - #m.update_likelihood_approximation() - # Parameters optimization: - #m.optimize() - m.pseudo_EM() + if optimize: + #m.update_likelihood_approximation() + # Parameters optimization: + #m.optimize() + #m.update_likelihood_approximation() + m.pseudo_EM() # Plot - fig, axes = pb.subplots(2,1) - m.plot_f(ax=axes[0]) - m.plot(ax=axes[1]) - print(m) + if plot: + fig, axes = pb.subplots(2, 1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + print m return m -def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): +def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True): + """ + Simple 1D classification example using Laplace approximation + + :param seed: seed value for data generation (default is 4). + :type seed: int + + """ + + data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) + Y = data['Y'][:, 0:1] + Y[Y.flatten() == -1] = 0 + + bern_noise_model = GPy.likelihoods.bernoulli() + laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model) + + # Model definition + m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood) + print m + + # Optimize + if optimize: + #m.update_likelihood_approximation() + # Parameters optimization: + m.optimize('bfgs', messages=1) + #m.pseudo_EM() + + # Plot + if plot: + fig, axes = pb.subplots(2, 1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + + print m + return m + +def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True): """ Sparse 1D classification example @@ -85,24 +124,26 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing) - m['.*len']= 4. + m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing) + m['.*len'] = 4. # Optimize - #m.update_likelihood_approximation() - # Parameters optimization: - #m.optimize() - m.pseudo_EM() + if optimize: + #m.update_likelihood_approximation() + # Parameters optimization: + #m.optimize() + m.pseudo_EM() # Plot - fig, axes = pb.subplots(2,1) - m.plot_f(ax=axes[0]) - m.plot(ax=axes[1]) - print(m) + if plot: + fig, axes = pb.subplots(2, 1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + print m return m -def toy_heaviside(seed=default_seed): +def toy_heaviside(seed=default_seed, optimize=True, plot=True): """ Simple 1D classification example using a heavy side gp transformation @@ -116,25 +157,27 @@ def toy_heaviside(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) - likelihood = GPy.likelihoods.EP(Y,noise_model) + noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) + likelihood = GPy.likelihoods.EP(Y, noise_model) m = GPy.models.GPClassification(data['X'], likelihood=likelihood) # Optimize - m.update_likelihood_approximation() - # Parameters optimization: - m.optimize() - #m.pseudo_EM() + if optimize: + m.update_likelihood_approximation() + # Parameters optimization: + m.optimize() + #m.pseudo_EM() # Plot - fig, axes = pb.subplots(2,1) - m.plot_f(ax=axes[0]) - m.plot(ax=axes[1]) - print(m) + if plot: + fig, axes = pb.subplots(2, 1) + m.plot_f(ax=axes[0]) + m.plot(ax=axes[1]) + print m return m -def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None): +def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True): """ Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. @@ -151,7 +194,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel= Y[Y.flatten()==-1] = 0 if model_type == 'Full': - m = GPy.models.GPClassification(data['X'], Y,kernel=kernel) + m = GPy.models.GPClassification(data['X'], Y, kernel=kernel) elif model_type == 'DTC': m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) @@ -161,8 +204,11 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel= m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) m['.*len'] = 3. - m.pseudo_EM() - print(m) - m.plot() + if optimize: + m.pseudo_EM() + if plot: + m.plot() + + print m return m diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index badef423..46fc6797 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -1,96 +1,105 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) +import numpy as _np +default_seed = _np.random.seed(123344) -import numpy as np -from matplotlib import pyplot as plt, cm +def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False): + """ + model for testing purposes. Samples from a GP with rbf kernel and learns + the samples with a new kernel. Normally not for optimization, just model cheking + """ + from GPy.likelihoods.gaussian import Gaussian + import GPy -from ..models.bayesian_gplvm import BayesianGPLVM -from ..likelihoods.gaussian import Gaussian -import GPy + num_inputs = 13 + num_inducing = 5 + if plot: + output_dim = 1 + input_dim = 2 + else: + input_dim = 2 + output_dim = 25 -default_seed = np.random.seed(123344) - -def BGPLVM(seed=default_seed): - N = 5 - num_inducing = 4 - input_dim = 3 - D = 2 # generate GPLVM-like data - X = np.random.rand(N, input_dim) - lengthscales = np.random.rand(input_dim) + X = _np.random.rand(num_inputs, input_dim) + lengthscales = _np.random.rand(input_dim) k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + GPy.kern.white(input_dim, 0.01)) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N), K, D).T + Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T lik = Gaussian(Y, normalize=True) -# k = GPy.kern.rbf_inv(input_dim, .5, np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - k = GPy.kern.rbf(input_dim, ARD=1, name="rbf1") + GPy.kern.rbf(input_dim, ARD=1, name='rbf2') + GPy.kern.linear(input_dim, ARD=1, name='linear_part') -# k = GPy.kern.rbf(input_dim, ARD = False) + k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) - m = BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) + m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) + #=========================================================================== + # randomly obstruct data with percentage p + p = .8 + Y_obstruct = Y.copy() + Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan + #=========================================================================== + m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) m.lengthscales = lengthscales - # m.constrain_positive('(rbf|bias|noise|white|S)') - # m.constrain_fixed('S', 1) - # pb.figure() - # m.plot() - # pb.title('PCA initialisation') - # pb.figure() - # m.optimize(messages = 1) - # m.plot() - # pb.title('After optimisation') - # m.randomize() - # m.checkgrad(verbose=1) + if plot: + import matplotlib.pyplot as pb + m.plot() + pb.title('PCA initialisation') + m2.plot() + pb.title('PCA initialisation') - return m + if optimize: + m.optimize('scg', messages=verbose) + m2.optimize('scg', messages=verbose) + if plot: + m.plot() + pb.title('After optimisation') + m2.plot() + pb.title('After optimisation') -def GPLVM_oil_100(optimize=True, plot=True): + return m, m2 + +def gplvm_oil_100(optimize=True, verbose=1, plot=True): + import GPy data = GPy.util.datasets.oil_100() Y = data['X'] - # create simple GP model kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) - - # optimize - if optimize: - m.optimize('scg', messages=1) - - # plot - print(m) - if plot: - m.plot_latent(labels=m.data_labels) + if optimize: m.optimize('scg', messages=verbose) + if plot: m.plot_latent(labels=m.data_labels) return m -def sparseGPLVM_oil(optimize=True, N=100, input_dim=6, num_inducing=15, max_iters=50): - np.random.seed(0) +def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50): + import GPy + _np.random.seed(0) data = GPy.util.datasets.oil() - Y = data['X'][:N] Y = Y - Y.mean(0) Y /= Y.std(0) + # Create the model + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) + m.data_labels = data['Y'][:N].argmax(axis=1) - # create simple GP model - kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim) - m = GPy.models.SparseGPLVM(Y, input_dim, kernel=kernel, num_inducing=num_inducing) - m.data_labels = data['Y'].argmax(axis=1) - - # optimize - if optimize: - m.optimize('scg', messages=1, max_iters=max_iters) - - # plot - print(m) - # m.plot_latent(labels=m.data_labels) + if optimize: m.optimize('scg', messages=verbose, max_iters=max_iters) + if plot: + m.plot_latent(labels=m.data_labels) + m.kern.plot_ARD() return m -def swiss_roll(optimize=True, N=1000, num_inducing=15, input_dim=4, sigma=.2, plot=False): +def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4, sigma=.2): + import GPy from GPy.util.datasets import swiss_roll_generated - from GPy.core.transformations import LogexpClipped + from GPy.models import BayesianGPLVM - data = swiss_roll_generated(N=N, sigma=sigma) + data = swiss_roll_generated(num_samples=N, sigma=sigma) Y = data['Y'] Y -= Y.mean() Y /= Y.std() @@ -102,120 +111,99 @@ def swiss_roll(optimize=True, N=1000, num_inducing=15, input_dim=4, sigma=.2, pl from sklearn.manifold.isomap import Isomap iso = Isomap().fit(Y) X = iso.embedding_ - if input_dim > 2: - X = np.hstack((X, np.random.randn(N, input_dim - 2))) + if Q > 2: + X = _np.hstack((X, _np.random.randn(N, Q - 2))) except ImportError: - X = np.random.randn(N, input_dim) + X = _np.random.randn(N, Q) if plot: - from mpl_toolkits import mplot3d - import pylab - fig = pylab.figure("Swiss Roll Data") + import matplotlib.pyplot as plt + from mpl_toolkits.mplot3d import Axes3D # @UnusedImport + fig = plt.figure("Swiss Roll Data") ax = fig.add_subplot(121, projection='3d') ax.scatter(*Y.T, c=c) ax.set_title("Swiss Roll") ax = fig.add_subplot(122) ax.scatter(*X.T[:2], c=c) - ax.set_title("Initialization") - + ax.set_title("BGPLVM init") var = .5 - S = (var * np.ones_like(X) + np.clip(np.random.randn(N, input_dim) * var ** 2, + S = (var * _np.ones_like(X) + _np.clip(_np.random.randn(N, Q) * var ** 2, - (1 - var), (1 - var))) + .001 - Z = np.random.permutation(X)[:num_inducing] + Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim, np.exp(-2)) + GPy.kern.white(input_dim, np.exp(-2)) + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) - m = BayesianGPLVM(Y, input_dim, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) + m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c m.data_t = t - - m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0) m['noise_variance'] = Y.var() / 100. - m['bias_variance'] = 0.05 if optimize: - m.optimize('scg', messages=1) + m.optimize('scg', messages=verbose, max_iters=2e3) + + if plot: + fig = plt.figure('fitted') + ax = fig.add_subplot(111) + s = m.input_sensitivity().argsort()[::-1][:2] + ax.scatter(*m.X.T[s], c=c) + return m -def BGPLVM_oil(optimize=True, N=200, input_dim=7, num_inducing=40, max_iters=1000, plot=False, **k): - np.random.seed(0) +def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): + import GPy + from GPy.likelihoods import Gaussian + from matplotlib import pyplot as plt + + _np.random.seed(0) data = GPy.util.datasets.oil() - # create simple GP model - kernel = GPy.kern.rbf_inv(input_dim, 1., [.1] * input_dim, ARD=True) + GPy.kern.bias(input_dim, np.exp(-2)) - + kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) Y = data['X'][:N] Yn = Gaussian(Y, normalize=True) -# Yn = Y - Y.mean(0) -# Yn /= Yn.std(0) - - m = GPy.models.BayesianGPLVM(Yn, input_dim, kernel=kernel, num_inducing=num_inducing, **k) + m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) + m['noise'] = Yn.Y.var() / 100. - # m.constrain('variance|leng', LogexpClipped()) - # m['.*lengt'] = m.X.var(0).max() / m.X.var(0) - m['gaussian'] = Yn.Y.var() / 100. - - - # optimize if optimize: - m.gaussian.variance.fix() # m.constrain_fixed('noise') - m.optimize('scg', messages=1, max_iters=200, gtol=.05) - m.gaussian.variance.constrain_positive() # m.constrain_positive('noise') - #m.constrain_bounded('white', 1e-7, 1) - m.optimize('scg', messages=1, max_iters=max_iters, gtol=.05) + m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) if plot: y = m.likelihood.Y[0, :] fig, (latent_axes, sense_axes) = plt.subplots(1, 2) - plt.sca(latent_axes) - m.plot_latent() + m.plot_latent(ax=latent_axes) data_show = GPy.util.visualize.vector_show(y) - lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes) + lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable + m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') plt.close(fig) return m -def oil_100(): - data = GPy.util.datasets.oil_100() - m = GPy.models.GPLVM(data['X'], 2) - - # optimize - m.optimize(messages=1, max_iters=2) - - # plot - print(m) - # m.plot_latent(labels=data['Y'].argmax(axis=1)) - return m - - - -def _simulate_sincos(D1, D2, D3, N, num_inducing, input_dim, plot_sim=False): - x = np.linspace(0, 4 * np.pi, N)[:, None] - s1 = np.vectorize(lambda x: np.sin(x)) - s2 = np.vectorize(lambda x: np.cos(x)) - s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x))) - sS = np.vectorize(lambda x: np.sin(2 * x)) +def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): + x = _np.linspace(0, 4 * _np.pi, N)[:, None] + s1 = _np.vectorize(lambda x: _np.sin(x)) + s2 = _np.vectorize(lambda x: _np.cos(x)) + s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) + sS = _np.vectorize(lambda x: _np.sin(2 * x)) s1 = s1(x) s2 = s2(x) s3 = s3(x) sS = sS(x) - S1 = np.hstack([s1, sS]) - S2 = np.hstack([s2, s3, sS]) - S3 = np.hstack([s3, sS]) + S1 = _np.hstack([s1, sS]) + S2 = _np.hstack([s2, s3, sS]) + S3 = _np.hstack([s3, sS]) - Y1 = S1.dot(np.random.randn(S1.shape[1], D1)) - Y2 = S2.dot(np.random.randn(S2.shape[1], D2)) - Y3 = S3.dot(np.random.randn(S3.shape[1], D3)) + Y1 = S1.dot(_np.random.randn(S1.shape[1], D1)) + Y2 = S2.dot(_np.random.randn(S2.shape[1], D2)) + Y3 = S3.dot(_np.random.randn(S3.shape[1], D3)) - Y1 += .3 * np.random.randn(*Y1.shape) - Y2 += .2 * np.random.randn(*Y2.shape) - Y3 += .25 * np.random.randn(*Y3.shape) + Y1 += .3 * _np.random.randn(*Y1.shape) + Y2 += .2 * _np.random.randn(*Y2.shape) + Y3 += .25 * _np.random.randn(*Y3.shape) Y1 -= Y1.mean(0) Y2 -= Y2.mean(0) @@ -230,6 +218,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, input_dim, plot_sim=False): if plot_sim: import pylab + import matplotlib.cm as cm import itertools fig = pylab.figure("MRD Simulation Data", figsize=(8, 6)) fig.clf() @@ -247,114 +236,99 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, input_dim, plot_sim=False): return slist, [S1, S2, S3], Ylist -def bgplvm_simulation_matlab_compare(): - from GPy.util.datasets import simulation_BGPLVM - sim_data = simulation_BGPLVM() - Y = sim_data['Y'] - S = sim_data['S'] - mu = sim_data['mu'] - num_inducing, [_, input_dim] = 3, mu.shape +# def bgplvm_simulation_matlab_compare(): +# from GPy.util.datasets import simulation_BGPLVM +# from GPy import kern +# from GPy.models import BayesianGPLVM +# +# sim_data = simulation_BGPLVM() +# Y = sim_data['Y'] +# mu = sim_data['mu'] +# num_inducing, [_, Q] = 3, mu.shape +# +# k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) +# m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, +# _debug=False) +# m.auto_scale_factor = True +# m['noise'] = Y.var() / 100. +# m['linear_variance'] = .01 +# return m - from GPy.models import mrd - from GPy import kern - reload(mrd); reload(kern) - k = kern.linear(input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2)) - m = BayesianGPLVM(Y, input_dim, init="PCA", num_inducing=num_inducing, kernel=k, -# X=mu, -# X_variance=S, - _debug=False) - m.auto_scale_factor = True - m['gaussian'] = Y.var() / 100. - m['linear_variance'] = .01 - return m - -def bgplvm_simulation(optimize='scg', - plot=True, +def bgplvm_simulation(optimize=True, verbose=1, + plot=True, plot_sim=False, max_iters=2e4, - plot_sim=False): -# from GPy.core.transformations import LogexpClipped - D1, D2, D3, N, num_inducing, input_dim = 15, 5, 8, 30, 3, 10 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, input_dim, plot_sim) - - from GPy.models import mrd + ): from GPy import kern - reload(mrd); reload(kern) + from GPy.models import BayesianGPLVM + D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - - k = kern.linear(input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2)) # + kern.bias(input_dim) - m = BayesianGPLVM(Y, input_dim, init="PCA", num_inducing=num_inducing, kernel=k) - - import ipdb; ipdb.set_trace() - # m.constrain('variance|noise', LogexpClipped()) - m['gaussian'] = Y.var() / 100. + k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) + m['noise'] = Y.var() / 100. if optimize: print "Optimizing model:" - m.optimize(optimize, max_iters=max_iters, - messages=True, gtol=.05) + m.optimize('scg', messages=verbose, max_iters=max_iters, + gtol=.05) if plot: m.plot_X_1d("BGPLVM Latent Space 1D") m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') return m -def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): - D1, D2, D3, N, num_inducing, input_dim = 60, 20, 36, 60, 6, 5 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, input_dim, plot_sim) +def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): + from GPy import kern + from GPy.models import MRD + from GPy.likelihoods import Gaussian + D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] - from GPy.models import mrd - from GPy import kern - - reload(mrd); reload(kern) - - k = kern.linear(input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2)) - m = mrd.MRD(likelihood_list, input_dim=input_dim, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) + k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) + m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) m.ensure_default_constraints() for i, bgplvm in enumerate(m.bgplvms): m['{}_noise'.format(i)] = bgplvm.likelihood.Y.var() / 500. - - # DEBUG - # np.seterr("raise") - if optimize: print "Optimizing Model:" - m.optimize(messages=1, max_iters=8e3, gtol=.1) + m.optimize(messages=verbose, max_iters=8e3, gtol=.1) if plot: m.plot_X_1d("MRD Latent Space 1D") m.plot_scales("MRD Scales") return m -def brendan_faces(): - from GPy import kern +def brendan_faces(optimize=True, verbose=True, plot=True): + import GPy + data = GPy.util.datasets.brendan_faces() - input_dim = 2 - Y = data['Y'][0:-1:10, :] - # Y = data['Y'] + Q = 2 + Y = data['Y'] Yn = Y - Y.mean() Yn /= Yn.std() - m = GPy.models.GPLVM(Yn, input_dim) - # m = GPy.models.BayesianGPLVM(Yn, input_dim, num_inducing=100) + m = GPy.models.GPLVM(Yn, Q) # optimize - m.constrain('rbf|noise|white', GPy.core.transformations.LogexpClipped()) + m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) - m.optimize('scg', messages=1, max_f_eval=10000) + if optimize: m.optimize('scg', messages=verbose, max_iters=1000) - ax = m.plot_latent(which_indices=(0, 1)) - y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) - raw_input('Press enter to finish') + if plot: + ax = m.plot_latent(which_indices=(0, 1)) + y = m.likelihood.Y[0, :] + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) + GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + raw_input('Press enter to finish') return m -def olivetti_faces(): - from GPy import kern +def olivetti_faces(optimize=True, verbose=True, plot=True): + import GPy + data = GPy.util.datasets.olivetti_faces() Q = 2 Y = data['Y'] @@ -362,152 +336,142 @@ def olivetti_faces(): Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - m.optimize('scg', messages=1, max_iters=1000) - - ax = m.plot_latent(which_indices=(0, 1)) - y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) - raw_input('Press enter to finish') + if optimize: m.optimize('scg', messages=verbose, max_iters=1000) + if plot: + ax = m.plot_latent(which_indices=(0, 1)) + y = m.likelihood.Y[0, :] + data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) + GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + raw_input('Press enter to finish') return m -def stick_play(range=None, frame_rate=15): +def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True): + import GPy data = GPy.util.datasets.osu_run1() # optimize if range == None: Y = data['Y'].copy() else: Y = data['Y'][range[0]:range[1], :].copy() - y = Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.data_play(Y, data_show, frame_rate) + if plot: + y = Y[0, :] + data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.util.visualize.data_play(Y, data_show, frame_rate) return Y -def stick(kernel=None): +def stick(kernel=None, optimize=True, verbose=True, plot=True): + from matplotlib import pyplot as plt + import GPy + data = GPy.util.datasets.osu_run1() # optimize m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) - m.optimize(messages=1, max_f_eval=10000) - if GPy.util.visualize.visual_available: + if optimize: m.optimize(messages=verbose, max_f_eval=10000) + if plot and GPy.util.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m -def bcgplvm_linear_stick(kernel=None): +def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): + from matplotlib import pyplot as plt + import GPy + data = GPy.util.datasets.osu_run1() # optimize mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) - m.optimize(messages=1, max_f_eval=10000) - if GPy.util.visualize.visual_available: + if optimize: m.optimize(messages=verbose, max_f_eval=10000) + if plot and GPy.util.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m -def bcgplvm_stick(kernel=None): +def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): + from matplotlib import pyplot as plt + import GPy + data = GPy.util.datasets.osu_run1() # optimize back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) - m.optimize(messages=1, max_f_eval=10000) - if GPy.util.visualize.visual_available: + if optimize: m.optimize(messages=verbose, max_f_eval=10000) + if plot and GPy.util.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m -def robot_wireless(): +def robot_wireless(optimize=True, verbose=True, plot=True): + from matplotlib import pyplot as plt + import GPy + data = GPy.util.datasets.robot_wireless() # optimize m = GPy.models.GPLVM(data['Y'], 2) - m.optimize(messages=1, max_f_eval=10000) + if optimize: m.optimize(messages=verbose, max_f_eval=10000) m._set_params(m._get_params()) - plt.clf - ax = m.plot_latent() + if plot: + m.plot_latent() return m -def stick_bgplvm(model=None): +def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): + from GPy.models import BayesianGPLVM + from matplotlib import pyplot as plt + import GPy + data = GPy.util.datasets.osu_run1() - input_dim = 6 - kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim, np.exp(-2)) + GPy.kern.white(input_dim, np.exp(-2)) - m = BayesianGPLVM(data['Y'], input_dim, init="PCA", num_inducing=20, kernel=kernel) + Q = 6 + kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() - m.optimize('scg', messages=1, max_iters=200, xtol=1e-300, ftol=1e-300) + if optimize: m.optimize('scg', messages=verbose, max_iters=200, xtol=1e-300, ftol=1e-300) m._set_params(m._get_params()) - plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) - plt.sca(latent_axes) - m.plot_latent() - y = m.likelihood.Y[0, :].copy() - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) - raw_input('Press enter to finish') + if plot: + plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) + plt.sca(latent_axes) + m.plot_latent() + y = m.likelihood.Y[0, :].copy() + data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) + raw_input('Press enter to finish') return m -def cmu_mocap(subject='35', motion=['01'], in_place=True): +def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True): + import GPy data = GPy.util.datasets.cmu_mocap(subject, motion) - Y = data['Y'] if in_place: # Make figure move in place. data['Y'][:, 0:3] = 0.0 m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True) - # optimize - m.optimize(messages=1, max_f_eval=10000) - - ax = m.plot_latent() - y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) - raw_input('Press enter to finish') - lvm_visualizer.close() + if optimize: m.optimize(messages=verbose, max_f_eval=10000) + if plot: + ax = m.plot_latent() + y = m.likelihood.Y[0, :] + data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) + lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + raw_input('Press enter to finish') + lvm_visualizer.close() return m - -# def BGPLVM_oil(): -# data = GPy.util.datasets.oil() -# Y, X = data['Y'], data['X'] -# X -= X.mean(axis=0) -# X /= X.std(axis=0) -# -# input_dim = 10 -# num_inducing = 30 -# -# kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) -# m = GPy.models.BayesianGPLVM(X, input_dim, kernel=kernel, num_inducing=num_inducing) -# # m.scale_factor = 100.0 -# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') -# from sklearn import cluster -# km = cluster.KMeans(num_inducing, verbose=10) -# Z = km.fit(m.X).cluster_centers_ -# # Z = GPy.util.misc.kmm_init(m.X, num_inducing) -# m.set('iip', Z) -# m.set('bias', 1e-4) -# # optimize -# -# import pdb; pdb.set_trace() -# m.optimize('tnc', messages=1) -# print m -# m.plot_latent(labels=data['Y'].argmax(axis=1)) -# return m - diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py new file mode 100644 index 00000000..bda80137 --- /dev/null +++ b/GPy/examples/non_gaussian.py @@ -0,0 +1,286 @@ +import GPy +import numpy as np +import matplotlib.pyplot as plt +from GPy.util import datasets + +def student_t_approx(optimize=True, plot=True): + """ + Example of regressing with a student t likelihood using Laplace + """ + real_std = 0.1 + #Start a function, any function + X = np.linspace(0.0, np.pi*2, 100)[:, None] + Y = np.sin(X) + np.random.randn(*X.shape)*real_std + Y = Y/Y.max() + Yc = Y.copy() + + X_full = np.linspace(0.0, np.pi*2, 500)[:, None] + Y_full = np.sin(X_full) + Y_full = Y_full/Y_full.max() + + #Slightly noisy data + Yc[75:80] += 1 + + #Very noisy data + #Yc[10] += 100 + #Yc[25] += 10 + #Yc[23] += 10 + #Yc[26] += 1000 + #Yc[24] += 10 + #Yc = Yc/Yc.max() + + #Add student t random noise to datapoints + deg_free = 5 + print "Real noise: ", real_std + initial_var_guess = 0.5 + edited_real_sd = initial_var_guess + + # Kernel object + kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + kernel2 = kernel1.copy() + kernel3 = kernel1.copy() + kernel4 = kernel1.copy() + + #Gaussian GP model on clean data + m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) + # optimize + m1.ensure_default_constraints() + m1.constrain_fixed('white', 1e-5) + m1.randomize() + + #Gaussian GP model on corrupt data + m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2) + m2.ensure_default_constraints() + m2.constrain_fixed('white', 1e-5) + m2.randomize() + + #Student t GP model on clean data + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) + stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) + m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood) + m3.ensure_default_constraints() + m3.constrain_bounded('t_noise', 1e-6, 10.) + m3.constrain_fixed('white', 1e-5) + m3.randomize() + + #Student t GP model on corrupt data + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) + corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution) + m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) + m4.ensure_default_constraints() + m4.constrain_bounded('t_noise', 1e-6, 10.) + m4.constrain_fixed('white', 1e-5) + m4.randomize() + + if optimize: + optimizer='scg' + print "Clean Gaussian" + m1.optimize(optimizer, messages=1) + print "Corrupt Gaussian" + m2.optimize(optimizer, messages=1) + print "Clean student t" + m3.optimize(optimizer, messages=1) + print "Corrupt student t" + m4.optimize(optimizer, messages=1) + + if plot: + plt.figure(1) + plt.suptitle('Gaussian likelihood') + ax = plt.subplot(211) + m1.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Gaussian clean') + + ax = plt.subplot(212) + m2.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Gaussian corrupt') + + plt.figure(2) + plt.suptitle('Student-t likelihood') + ax = plt.subplot(211) + m3.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Student-t rasm clean') + + ax = plt.subplot(212) + m4.plot(ax=ax) + plt.plot(X_full, Y_full) + plt.ylim(-1.5, 1.5) + plt.title('Student-t rasm corrupt') + + return m1, m2, m3, m4 + +def boston_example(optimize=True, plot=True): + import sklearn + from sklearn.cross_validation import KFold + optimizer='bfgs' + messages=0 + data = datasets.boston_housing() + degrees_freedoms = [3, 5, 8, 10] + X = data['X'].copy() + Y = data['Y'].copy() + X = X-X.mean(axis=0) + X = X/X.std(axis=0) + Y = Y-Y.mean() + Y = Y/Y.std() + num_folds = 10 + kf = KFold(len(Y), n_folds=num_folds, indices=True) + num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx + score_folds = np.zeros((num_models, num_folds)) + pred_density = score_folds.copy() + + def rmse(Y, Ystar): + return np.sqrt(np.mean((Y-Ystar)**2)) + + for n, (train, test) in enumerate(kf): + X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] + print "Fold {}".format(n) + + noise = 1e-1 #np.exp(-2) + rbf_len = 0.5 + data_axis_plot = 4 + kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + + #Baseline + score_folds[0, n] = rmse(Y_test, np.mean(Y_train)) + + #Gaussian GP + print "Gauss GP" + mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) + mgp.ensure_default_constraints() + mgp.constrain_fixed('white', 1e-5) + mgp['rbf_len'] = rbf_len + mgp['noise'] = noise + print mgp + if optimize: + mgp.optimize(optimizer=optimizer, messages=messages) + Y_test_pred = mgp.predict(X_test) + score_folds[1, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test)) + print mgp + print pred_density + + print "Gaussian Laplace GP" + N, D = Y_train.shape + g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D) + g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) + mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) + mg.ensure_default_constraints() + mg.constrain_positive('noise_variance') + mg.constrain_fixed('white', 1e-5) + mg['rbf_len'] = rbf_len + mg['noise'] = noise + print mg + if optimize: + mg.optimize(optimizer=optimizer, messages=messages) + Y_test_pred = mg.predict(X_test) + score_folds[2, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test)) + print pred_density + print mg + + for stu_num, df in enumerate(degrees_freedoms): + #Student T + print "Student-T GP {}df".format(df) + t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) + stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) + mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) + mstu_t.ensure_default_constraints() + mstu_t.constrain_fixed('white', 1e-5) + mstu_t.constrain_bounded('t_noise', 0.0001, 1000) + mstu_t['rbf_len'] = rbf_len + mstu_t['t_noise'] = noise + print mstu_t + if optimize: + mstu_t.optimize(optimizer=optimizer, messages=messages) + Y_test_pred = mstu_t.predict(X_test) + score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0]) + pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test)) + print pred_density + print mstu_t + + if plot: + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('GP gauss') + + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('Lap gauss') + + plt.figure() + plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0]) + plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') + plt.title('Stu t {}df'.format(df)) + + print "Average scores: {}".format(np.mean(score_folds, 1)) + print "Average pred density: {}".format(np.mean(pred_density, 1)) + + if plot: + #Plotting + stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms] + legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends + + #Plot boxplots for RMSE density + fig = plt.figure() + ax=fig.add_subplot(111) + plt.title('RMSE') + bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5) + plt.setp(bp['boxes'], color='black') + plt.setp(bp['whiskers'], color='black') + plt.setp(bp['fliers'], color='red', marker='+') + xtickNames = plt.setp(ax, xticklabels=legends) + plt.setp(xtickNames, rotation=45, fontsize=8) + ax.set_ylabel('RMSE') + ax.set_xlabel('Distribution') + #Make grid and put it below boxes + ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', + alpha=0.5) + ax.set_axisbelow(True) + + #Plot boxplots for predictive density + fig = plt.figure() + ax=fig.add_subplot(111) + plt.title('Predictive density') + bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5) + plt.setp(bp['boxes'], color='black') + plt.setp(bp['whiskers'], color='black') + plt.setp(bp['fliers'], color='red', marker='+') + xtickNames = plt.setp(ax, xticklabels=legends[1:]) + plt.setp(xtickNames, rotation=45, fontsize=8) + ax.set_ylabel('Mean Log probability P(Y*|Y)') + ax.set_xlabel('Distribution') + #Make grid and put it below boxes + ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', + alpha=0.5) + ax.set_axisbelow(True) + return mstu_t + +#def precipitation_example(): + #import sklearn + #from sklearn.cross_validation import KFold + #data = datasets.boston_housing() + #X = data['X'].copy() + #Y = data['Y'].copy() + #X = X-X.mean(axis=0) + #X = X/X.std(axis=0) + #Y = Y-Y.mean() + #Y = Y/Y.std() + #import ipdb; ipdb.set_trace() # XXX BREAKPOINT + #num_folds = 10 + #kf = KFold(len(Y), n_folds=num_folds, indices=True) + #score_folds = np.zeros((4, num_folds)) + #def rmse(Y, Ystar): + #return np.sqrt(np.mean((Y-Ystar)**2)) + ##for train, test in kf: + #for n, (train, test) in enumerate(kf): + #X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] + #print "Fold {}".format(n) + diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 8f8b3d6e..65a50f0e 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -1,7 +1,6 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) - """ Gaussian Processes regression examples """ @@ -9,88 +8,105 @@ import pylab as pb import numpy as np import GPy -def coregionalization_toy2(max_iters=100): +def olympic_marathon_men(optimize=True, plot=True): + """Run a standard Gaussian process regression on the Olympic marathon data.""" + data = GPy.util.datasets.olympic_marathon_men() + + # create simple GP Model + m = GPy.models.GPRegression(data['X'], data['Y']) + + # set the lengthscale to be something sensible (defaults to 1) + m['rbf_lengthscale'] = 10 + + if optimize: + m.optimize('bfgs', max_iters=200) + if plot: + m.plot(plot_limits=(1850, 2050)) + + return m + +def coregionalization_toy2(optimize=True, plot=True): """ A simple demonstration of coregionalization on two sinusoidal functions. """ + #build a design matrix with a column of integers indicating the output X1 = np.random.rand(50, 1) * 8 X2 = np.random.rand(30, 1) * 5 index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) X = np.hstack((np.vstack((X1, X2)), index)) + + #build a suitable set of observed variables Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2. Y = np.vstack((Y1, Y2)) + #build the kernel k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalize(2,1) - k = k1**k2 #k = k1.prod(k2,tensor=True) + k = k1**k2 m = GPy.models.GPRegression(X, Y, kernel=k) m.constrain_fixed('.*rbf_var', 1.) - # m.constrain_positive('.*kappa') - m.optimize('sim', messages=1, max_iters=max_iters) - pb.figure() - Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) - Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) - mean, var, low, up = m.predict(Xtest1) - GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) - mean, var, low, up = m.predict(Xtest2) - GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up) - pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2) - pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) + if optimize: + m.optimize('bfgs', max_iters=100) + + if plot: + m.plot(fixed_inputs=[(1,0)]) + m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) + return m -def coregionalization_toy(max_iters=100): - """ - A simple demonstration of coregionalization on two sinusoidal functions. - """ - X1 = np.random.rand(50, 1) * 8 - X2 = np.random.rand(30, 1) * 5 - X = np.vstack((X1, X2)) - Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 - Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 - Y = np.vstack((Y1, Y2)) +#FIXME: Needs recovering once likelihoods are consolidated +#def coregionalization_toy(optimize=True, plot=True): +# """ +# A simple demonstration of coregionalization on two sinusoidal functions. +# """ +# X1 = np.random.rand(50, 1) * 8 +# X2 = np.random.rand(30, 1) * 5 +# X = np.vstack((X1, X2)) +# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 +# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 +# Y = np.vstack((Y1, Y2)) +# +# k1 = GPy.kern.rbf(1) +# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) +# m.constrain_fixed('.*rbf_var', 1.) +# m.optimize(max_iters=100) +# +# fig, axes = pb.subplots(2,1) +# m.plot(fixed_inputs=[(1,0)],ax=axes[0]) +# m.plot(fixed_inputs=[(1,1)],ax=axes[1]) +# axes[0].set_title('Output 0') +# axes[1].set_title('Output 1') +# return m - k1 = GPy.kern.rbf(1) - m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) - m.constrain_fixed('.*rbf_var', 1.) - m.optimize(max_iters=max_iters) - - fig, axes = pb.subplots(2,1) - m.plot_single_output(output=0,ax=axes[0]) - m.plot_single_output(output=1,ax=axes[1]) - axes[0].set_title('Output 0') - axes[1].set_title('Output 1') - return m - -def coregionalization_sparse(max_iters=100): +def coregionalization_sparse(optimize=True, plot=True): """ A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations. """ - X1 = np.random.rand(500, 1) * 8 - X2 = np.random.rand(300, 1) * 5 - index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) - X = np.hstack((np.vstack((X1, X2)), index)) - Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 - Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 - Y = np.vstack((Y1, Y2)) + #fetch the data from the non sparse examples + m = coregionalization_toy2(optimize=False, plot=False) + X, Y = m.X, m.likelihood.Y - k1 = GPy.kern.rbf(1) + #construct a model + m = GPy.models.SparseGPRegression(X,Y) + m.constrain_fixed('iip_\d+_1') # don't optimize the inducing input indexes - m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=5) - m.constrain_fixed('.*rbf_var',1.) - #m.optimize(messages=1) - m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') + if optimize: + m.optimize('bfgs', max_iters=100, messages=1) + + if plot: + m.plot(fixed_inputs=[(1,0)]) + m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) - fig, axes = pb.subplots(2,1) - m.plot_single_output(output=0,ax=axes[0],plot_limits=(-1,9)) - m.plot_single_output(output=1,ax=axes[1],plot_limits=(-1,9)) - axes[0].set_title('Output 0') - axes[1].set_title('Output 1') return m -def epomeo_gpx(max_iters=100): - """Perform Gaussian process regression on the latitude and longitude data from the Mount Epomeo runs. Requires gpxpy to be installed on your system to load in the data.""" +def epomeo_gpx(max_iters=200, optimize=True, plot=True): + """ + Perform Gaussian process regression on the latitude and longitude data + from the Mount Epomeo runs. Requires gpxpy to be installed on your system + to load in the data. + """ data = GPy.util.datasets.epomeo_gpx() num_data_list = [] for Xpart in data['X']: @@ -119,14 +135,16 @@ def epomeo_gpx(max_iters=100): m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('iip') m.constrain_bounded('noise_variance', 1e-3, 1e-1) -# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs') m.optimize(max_iters=max_iters,messages=True) return m - -def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300): - """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisy mode is higher.""" +def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300, optimize=True, plot=True): + """ + Show an example of a multimodal error surface for Gaussian process + regression. Gene 939 has bimodal behaviour where the noisy mode is + higher. + """ # Contour over a range of length scales and signal/noise ratios. length_scales = np.linspace(0.1, 60., resolution) @@ -139,13 +157,14 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 data['Y'] = data['Y'] - np.mean(data['Y']) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) - pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) # @UndefinedVariable - ax = pb.gca() - pb.xlabel('length scale') - pb.ylabel('log_10 SNR') + if plot: + pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) + ax = pb.gca() + pb.xlabel('length scale') + pb.ylabel('log_10 SNR') - xlim = ax.get_xlim() - ylim = ax.get_ylim() + xlim = ax.get_xlim() + ylim = ax.get_ylim() # Now run a few optimizations models = [] @@ -162,25 +181,31 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); # optimize - m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters) + if optimize: + m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters) optim_point_x[1] = m['rbf_lengthscale'] optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); - pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') + if plot: + pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') models.append(m) - ax.set_xlim(xlim) - ax.set_ylim(ylim) + if plot: + ax.set_xlim(xlim) + ax.set_ylim(ylim) return m # (models, lls) def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): - """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. + """ + Evaluate the GP objective function for a given data set for a range of + signal to noise ratios and a range of lengthscales. :data_set: A data set from the utils.datasets director. :length_scales: a list of length scales to explore for the contour plot. :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot. - :kernel: a kernel to use for the 'signal' portion of the data.""" + :kernel: a kernel to use for the 'signal' portion of the data. + """ lls = [] total_var = np.var(data['Y']) @@ -203,75 +228,75 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): return np.array(lls) -def olympic_100m_men(max_iters=100, kernel=None): +def olympic_100m_men(optimize=True, plot=True): """Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" data = GPy.util.datasets.olympic_100m_men() # create simple GP Model - m = GPy.models.GPRegression(data['X'], data['Y'], kernel) + m = GPy.models.GPRegression(data['X'], data['Y']) # set the lengthscale to be something sensible (defaults to 1) - if kernel==None: - m['rbf_lengthscale'] = 10 + m['rbf_lengthscale'] = 10 - # optimize - m.optimize(max_iters=max_iters) + if optimize: + m.optimize('bfgs', max_iters=200) - # plot - m.plot(plot_limits=(1850, 2050)) - print(m) + if plot: + m.plot(plot_limits=(1850, 2050)) return m -def olympic_marathon_men(max_iters=100, kernel=None): - """Run a standard Gaussian process regression on the Olympic marathon data.""" - data = GPy.util.datasets.olympic_marathon_men() - - # create simple GP Model - m = GPy.models.GPRegression(data['X'], data['Y'], kernel) - - # set the lengthscale to be something sensible (defaults to 1) - if kernel==None: - m['rbf_lengthscale'] = 10 - - # optimize - m.optimize(max_iters=max_iters) - - # plot - m.plot(plot_limits=(1850, 2050)) - print(m) - return m - -def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100): +def toy_rbf_1d(optimize=True, plot=True): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d() # create simple GP Model m = GPy.models.GPRegression(data['X'], data['Y']) - # optimize - m.optimize(optimizer, max_f_eval=max_nb_eval_optim) - # plot - m.plot() - print(m) + if optimize: + m.optimize('bfgs') + if plot: + m.plot() + return m -def toy_rbf_1d_50(max_iters=100, optimize=True): +def toy_rbf_1d_50(optimize=True, plot=True): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d_50() # create simple GP Model m = GPy.models.GPRegression(data['X'], data['Y']) - # optimize if optimize: - m.optimize(max_iters=max_iters) + m.optimize('bfgs') + if plot: + m.plot() - # plot - m.plot() - print(m) return m -def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True): +def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): + """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" + optimizer='scg' + x_len = 30 + X = np.linspace(0, 10, x_len)[:, None] + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] + + noise_model = GPy.likelihoods.poisson() + likelihood = GPy.likelihoods.Laplace(Y,noise_model) + + # create simple GP Model + m = GPy.models.GPRegression(X, Y, likelihood=likelihood) + + if optimize: + m.optimize(optimizer) + if plot: + m.plot() + # plot the real underlying rate function + pb.plot(X, np.exp(f_true), '--k', linewidth=2) + + return m + +def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True): # Create an artificial dataset where the values in the targets (Y) # only depend in dimensions 1 and 3 of the inputs (X). Run ARD to # see if this dependency can be recovered @@ -301,13 +326,16 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) - if optimize: m.optimize(optimizer='scg', max_iters=max_iters, messages=1) + if optimize: + m.optimize(optimizer='scg', max_iters=max_iters, messages=1) - m.kern.plot_ARD() - print(m) + if plot: + m.kern.plot_ARD() + + print m return m -def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4): +def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True): # Create an artificial dataset where the values in the targets (Y) # only depend in dimensions 1 and 3 of the inputs (X). Run ARD to # see if this dependency can be recovered @@ -338,13 +366,16 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4): # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) - m.optimize(optimizer='scg', max_iters=max_iters, messages=1) + if optimize: + m.optimize(optimizer='scg', max_iters=max_iters, messages=1) - m.kern.plot_ARD() - print(m) + if plot: + m.kern.plot_ARD() + + print m return m -def robot_wireless(max_iters=100, kernel=None): +def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True): """Predict the location of a robot given wirelss signal strength readings.""" data = GPy.util.datasets.robot_wireless() @@ -352,20 +383,24 @@ def robot_wireless(max_iters=100, kernel=None): m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel) # optimize - m.optimize(messages=True, max_iters=max_iters) + if optimize: + m.optimize(messages=True, max_iters=max_iters) + Xpredict = m.predict(data['Ytest'])[0] - pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-') - pb.plot(Xpredict[:, 0], Xpredict[:, 1], 'b-') - pb.axis('equal') - pb.title('WiFi Localization with Gaussian Processes') - pb.legend(('True Location', 'Predicted Location')) + if plot: + pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-') + pb.plot(Xpredict[:, 0], Xpredict[:, 1], 'b-') + pb.axis('equal') + pb.title('WiFi Localization with Gaussian Processes') + pb.legend(('True Location', 'Predicted Location')) sse = ((data['Xtest'] - Xpredict)**2).sum() - print(m) + + print m print('Sum of squares error on test data: ' + str(sse)) return m -def silhouette(max_iters=100): +def silhouette(max_iters=100, optimize=True, plot=True): """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" data = GPy.util.datasets.silhouette() @@ -373,12 +408,13 @@ def silhouette(max_iters=100): m = GPy.models.GPRegression(data['X'], data['Y']) # optimize - m.optimize(messages=True, max_iters=max_iters) + if optimize: + m.optimize(messages=True, max_iters=max_iters) - print(m) + print m return m -def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, checkgrad=True): +def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs X = np.random.uniform(-3., 3., (num_samples, 1)) @@ -387,15 +423,17 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti rbf = GPy.kern.rbf(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) + m.checkgrad(verbose=1) - if checkgrad: - m.checkgrad(verbose=1) if optimize: m.optimize('tnc', messages=1, max_iters=max_iters) - m.plot() + + if plot: + m.plot() + return m -def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100): +def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True): """Run a 2D example of a sparse GP regression.""" X = np.random.uniform(-3., 3., (num_samples, 2)) Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05 @@ -411,13 +449,18 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100): m.checkgrad() - # optimize and plot - m.optimize('tnc', messages=1, max_iters=max_iters) - m.plot() - print(m) + # optimize + if optimize: + m.optimize('tnc', messages=1, max_iters=max_iters) + + # plot + if plot: + m.plot() + + print m return m -def uncertain_inputs_sparse_regression(max_iters=100): +def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): """Run a 1D example of a sparse GP regression with uncertain inputs.""" fig, axes = pb.subplots(1, 2, figsize=(12, 5)) @@ -432,18 +475,23 @@ def uncertain_inputs_sparse_regression(max_iters=100): # create simple GP Model - no input uncertainty on this one m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) - m.optimize('scg', messages=1, max_iters=max_iters) - m.plot(ax=axes[0]) - axes[0].set_title('no input uncertainty') + if optimize: + m.optimize('scg', messages=1, max_iters=max_iters) + + if plot: + m.plot(ax=axes[0]) + axes[0].set_title('no input uncertainty') + print m # the same Model with uncertainty m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) - m.optimize('scg', messages=1, max_iters=max_iters) - m.plot(ax=axes[1]) - axes[1].set_title('with input uncertainty') - print(m) - - fig.canvas.draw() + if optimize: + m.optimize('scg', messages=1, max_iters=max_iters) + if plot: + m.plot(ax=axes[1]) + axes[1].set_title('with input uncertainty') + fig.canvas.draw() + print m return m diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py index 21011901..c302ec7d 100644 --- a/GPy/examples/stochastic.py +++ b/GPy/examples/stochastic.py @@ -5,7 +5,7 @@ import pylab as pb import numpy as np import GPy -def toy_1d(): +def toy_1d(optimize=True, plot=True): N = 2000 M = 20 @@ -20,22 +20,18 @@ def toy_1d(): m.param_steplength = 1e-4 - fig = pb.figure() - ax = fig.add_subplot(111) - def cb(): - ax.cla() - m.plot(ax=ax,Z_height=-3) - ax.set_ylim(-3,3) - fig.canvas.draw() + if plot: + fig = pb.figure() + ax = fig.add_subplot(111) + def cb(foo): + ax.cla() + m.plot(ax=ax,Z_height=-3) + ax.set_ylim(-3,3) + fig.canvas.draw() - m.optimize(500, callback=cb, callback_interval=1) + if optimize: + m.optimize(500, callback=cb, callback_interval=1) - m.plot_traces() + if plot: + m.plot_traces() return m - - - - - - - diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py index 69fc2aaf..7825992d 100644 --- a/GPy/examples/tutorials.py +++ b/GPy/examples/tutorials.py @@ -11,7 +11,7 @@ pb.ion() import numpy as np import GPy -def tuto_GP_regression(): +def tuto_GP_regression(optimize=True, plot=True): """The detailed explanations of the commands used in this file can be found in the tutorial section""" X = np.random.uniform(-3.,3.,(20,1)) @@ -22,7 +22,8 @@ def tuto_GP_regression(): m = GPy.models.GPRegression(X, Y, kernel) print m - m.plot() + if plot: + m.plot() m.constrain_positive('') @@ -31,9 +32,9 @@ def tuto_GP_regression(): m.constrain_bounded('.*lengthscale',1.,10. ) m.constrain_fixed('.*noise',0.0025) - m.optimize() - - m.optimize_restarts(num_restarts = 10) + if optimize: + m.optimize() + m.optimize_restarts(num_restarts = 10) ####################################################### ####################################################### @@ -51,22 +52,26 @@ def tuto_GP_regression(): m.constrain_positive('') # optimize and plot - m.optimize('tnc', max_f_eval = 1000) - m.plot() - print(m) + if optimize: + m.optimize('tnc', max_f_eval = 1000) + if plot: + m.plot() + + print m return(m) -def tuto_kernel_overview(): +def tuto_kernel_overview(optimize=True, plot=True): """The detailed explanations of the commands used in this file can be found in the tutorial section""" ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.) ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.) ker3 = GPy.kern.rbf(1, .5, .5) - + print ker2 - ker1.plot() - ker2.plot() - ker3.plot() + if plot: + ker1.plot() + ker2.plot() + ker3.plot() k1 = GPy.kern.rbf(1,1.,2.) k2 = GPy.kern.Matern32(1, 0.5, 0.2) @@ -77,8 +82,8 @@ def tuto_kernel_overview(): # Sum of kernels k_add = k1.add(k2) # By default, tensor=False - k_addtens = k1.add(k2,tensor=True) - + k_addtens = k1.add(k2,tensor=True) + k1 = GPy.kern.rbf(1,1.,2) k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5) @@ -102,7 +107,7 @@ def tuto_kernel_overview(): k.unconstrain('white') k.constrain_bounded('white',lower=1e-5,upper=.5) print k - + k_cst = GPy.kern.bias(1,variance=1.) k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3) Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True) @@ -114,30 +119,32 @@ def tuto_kernel_overview(): # Create GP regression model m = GPy.models.GPRegression(X, Y, Kanova) - fig = pb.figure(figsize=(5,5)) - ax = fig.add_subplot(111) - m.plot(ax=ax) - - pb.figure(figsize=(20,3)) - pb.subplots_adjust(wspace=0.5) - axs = pb.subplot(1,5,1) - m.plot(ax=axs) - pb.subplot(1,5,2) - pb.ylabel("= ",rotation='horizontal',fontsize='30') - axs = pb.subplot(1,5,3) - m.plot(ax=axs, which_parts=[False,True,False,False]) - pb.ylabel("cst +",rotation='horizontal',fontsize='30') - axs = pb.subplot(1,5,4) - m.plot(ax=axs, which_parts=[False,False,True,False]) - pb.ylabel("+ ",rotation='horizontal',fontsize='30') - axs = pb.subplot(1,5,5) - pb.ylabel("+ ",rotation='horizontal',fontsize='30') - m.plot(ax=axs, which_parts=[False,False,False,True]) + + if plot: + fig = pb.figure(figsize=(5,5)) + ax = fig.add_subplot(111) + m.plot(ax=ax) + + pb.figure(figsize=(20,3)) + pb.subplots_adjust(wspace=0.5) + axs = pb.subplot(1,5,1) + m.plot(ax=axs) + pb.subplot(1,5,2) + pb.ylabel("= ",rotation='horizontal',fontsize='30') + axs = pb.subplot(1,5,3) + m.plot(ax=axs, which_parts=[False,True,False,False]) + pb.ylabel("cst +",rotation='horizontal',fontsize='30') + axs = pb.subplot(1,5,4) + m.plot(ax=axs, which_parts=[False,False,True,False]) + pb.ylabel("+ ",rotation='horizontal',fontsize='30') + axs = pb.subplot(1,5,5) + pb.ylabel("+ ",rotation='horizontal',fontsize='30') + m.plot(ax=axs, which_parts=[False,False,False,True]) return(m) -def model_interaction(): +def model_interaction(optimize=True, plot=True): X = np.random.randn(20,1) Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5. k = GPy.kern.rbf(1) + GPy.kern.bias(1) diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py index a8777e11..1192448a 100644 --- a/GPy/testing/bgplvm_tests.py +++ b/GPy/testing/bgplvm_tests.py @@ -4,7 +4,7 @@ import unittest import numpy as np import GPy -from GPy.models.bayesian_gplvm import BayesianGPLVM +from ..models import BayesianGPLVM class BGPLVMTests(unittest.TestCase): def test_bias_kern(self): diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py index 989251a7..be26fff6 100644 --- a/GPy/testing/examples_tests.py +++ b/GPy/testing/examples_tests.py @@ -10,6 +10,7 @@ import os import random from nose.tools import nottest import sys +import itertools class ExamplesTests(unittest.TestCase): def _checkgrad(self, Model): @@ -18,29 +19,27 @@ class ExamplesTests(unittest.TestCase): def _model_instance(self, Model): self.assertTrue(isinstance(Model, GPy.models)) -""" -def model_instance_generator(model): - def check_model_returned(self): - self._model_instance(model) - return check_model_returned - -def checkgrads_generator(model): - def model_checkgrads(self): - self._checkgrad(model) - return model_checkgrads -""" - def model_checkgrads(model): model.randomize() - #assert model.checkgrad() - return model.checkgrad() + #NOTE: Step as 1e-4, this should be acceptable for more peaky models + return model.checkgrad(step=1e-4) def model_instance(model): - #assert isinstance(model, GPy.core.model) - return isinstance(model, GPy.core.model) + return isinstance(model, GPy.core.model.Model) + +def flatten_nested(lst): + result = [] + for element in lst: + if hasattr(element, '__iter__'): + result.extend(flatten_nested(element)) + else: + result.append(element) + return result @nottest def test_models(): + optimize=False + plot=True examples_path = os.path.dirname(GPy.examples.__file__) # Load modules failing_models = {} @@ -54,29 +53,36 @@ def test_models(): print "After" print functions for example in functions: - if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100']: - print "SKIPPING" - continue + if example[0] in ['epomeo_gpx']: + #These are the edge cases that we might want to handle specially + if example[0] == 'epomeo_gpx' and not GPy.util.datasets.gpxpy_available: + print "Skipping as gpxpy is not available to parse GPS" + continue print "Testing example: ", example[0] # Generate model + try: - model = example[1]() + models = [ example[1](optimize=optimize, plot=plot) ] + #If more than one model returned, flatten them + models = flatten_nested(models) except Exception as e: failing_models[example[0]] = "Cannot make model: \n{e}".format(e=e) else: - print model + print models model_checkgrads.description = 'test_checkgrads_%s' % example[0] try: - if not model_checkgrads(model): - failing_models[model_checkgrads.description] = False + for model in models: + if not model_checkgrads(model): + failing_models[model_checkgrads.description] = False except Exception as e: failing_models[model_checkgrads.description] = e model_instance.description = 'test_instance_%s' % example[0] try: - if not model_instance(model): - failing_models[model_instance.description] = False + for model in models: + if not model_instance(model): + failing_models[model_instance.description] = False except Exception as e: failing_models[model_instance.description] = e diff --git a/GPy/testing/gp_transformation_tests.py b/GPy/testing/gp_transformation_tests.py new file mode 100644 index 00000000..42c0414b --- /dev/null +++ b/GPy/testing/gp_transformation_tests.py @@ -0,0 +1,61 @@ +from nose.tools import with_setup +from GPy.models import GradientChecker +from GPy.likelihoods.noise_models import gp_transformations +import inspect +import unittest +import numpy as np + +class TestTransformations(object): + """ + Generic transformations checker + """ + def setUp(self): + N = 30 + self.fs = [np.random.rand(N, 1), float(np.random.rand(1))] + + + def tearDown(self): + self.fs = None + + def test_transformations(self): + self.setUp() + transformations = [gp_transformations.Identity(), + gp_transformations.Log(), + gp_transformations.Probit(), + gp_transformations.Log_ex_1(), + gp_transformations.Reciprocal(), + ] + + for transformation in transformations: + for f in self.fs: + yield self.t_dtransf_df, transformation, f + yield self.t_d2transf_df2, transformation, f + yield self.t_d3transf_df3, transformation, f + + @with_setup(setUp, tearDown) + def t_dtransf_df(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.transf, transformation.dtransf_df, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2transf_df2(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.dtransf_df, transformation.d2transf_df2, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3transf_df3(self, transformation, f): + print "\n{}".format(inspect.stack()[0][3]) + grad = GradientChecker(transformation.d2transf_df2, transformation.d3transf_df3, f, 'f') + grad.randomize() + grad.checkgrad(verbose=1) + assert grad.checkgrad() + +#if __name__ == "__main__": + #print "Running unit tests" + #unittest.main() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 19cc1dcf..0fceac60 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -17,16 +17,11 @@ except ImportError: class KernelTests(unittest.TestCase): def test_kerneltie(self): K = GPy.kern.rbf(5, ARD=True) - K.rbf.lengthscale[0].tie_to(K.rbf.lengthscale[2]) - K.rbf.lengthscale[1].tie_to(K.rbf.lengthscale[3]) - K.rbf.lengthscale[2].constrain_fixed() + K.tie_params('.*[01]') + K.constrain_fixed('2') X = np.random.rand(5,5) Y = np.ones((5,1)) m = GPy.models.GPRegression(X,Y,K) - #self.assertRaises(RuntimeError, lambda: m.kern.rbf.lengthscale[3].tie_to(m.kern.rbf.lengthscale[1])) - #self.assertRaises(RuntimeError, lambda: m.kern.rbf.lengthscale[3].tie_to(m.kern.rbf.lengthscale[0])) - #self.assertRaises(RuntimeError, lambda: m.kern.rbf.lengthscale.tie_to(m.kern.rbf.lengthscale)) - import ipdb;ipdb.set_trace() self.assertTrue(m.checkgrad()) def test_rbfkernel(self): @@ -39,12 +34,14 @@ class KernelTests(unittest.TestCase): self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_eq_sympykernel(self): - kern = GPy.kern.eq_sympy(5, 3, output_ind=4) - self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + if SYMPY_AVAILABLE: + kern = GPy.kern.eq_sympy(5, 3) + self.assertTrue(GPy.kern.kern_test(kern, output_ind=4, verbose=verbose)) - def test_sinckernel(self): - kern = GPy.kern.sinc(5) - self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) + def test_ode1_eqkernel(self): + if SYMPY_AVAILABLE: + kern = GPy.kern.ode1_eq(3) + self.assertTrue(GPy.kern.kern_test(kern, output_ind=1, verbose=verbose, X_positive=True)) def test_rbf_invkernel(self): kern = GPy.kern.rbf_inv(5) @@ -83,7 +80,7 @@ class KernelTests(unittest.TestCase): self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_heterokernel(self): - kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.Logexp()) + kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp()) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) def test_mlpkernel(self): @@ -120,15 +117,5 @@ class KernelTests(unittest.TestCase): if __name__ == "__main__": -# K = GPy.kern.rbf(5, ARD=True) -# K.rbf.lengthscale[0].tie_to(K.rbf.lengthscale[2]) -# K.rbf.lengthscale[1].tie_to(K.rbf.lengthscale[3]) -# K.rbf.lengthscale[2].constrain_fixed() -# -# K.rbf.lengthscale[2:].tie_to(K.rbf.variance) -# X = np.random.rand(5,5) -# Y = np.ones((5,1)) -# m = GPy.models.GPRegression(X,Y,K) - print "Running unit tests, please be (very) patient..." unittest.main() diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py new file mode 100644 index 00000000..d14c9a41 --- /dev/null +++ b/GPy/testing/likelihood_tests.py @@ -0,0 +1,686 @@ +import numpy as np +import unittest +import GPy +from GPy.models import GradientChecker +import functools +import inspect +from GPy.likelihoods.noise_models import gp_transformations +from functools import partial +#np.random.seed(300) +np.random.seed(7) + +def dparam_partial(inst_func, *args): + """ + If we have a instance method that needs to be called but that doesn't + take the parameter we wish to change to checkgrad, then this function + will change the variable using set params. + + inst_func: should be a instance function of an object that we would like + to change + param: the param that will be given to set_params + args: anything else that needs to be given to the function (for example + the f or Y that are being used in the function whilst we tweak the + param + """ + def param_func(param, inst_func, args): + inst_func.im_self._set_params(param) + return inst_func(*args) + return functools.partial(param_func, inst_func=inst_func, args=args) + +def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False): + """ + checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N + However if we are holding other parameters fixed and moving something else + We need to check the gradient of each of the fixed parameters + (f and y for example) seperately, whilst moving another parameter. + Otherwise f: gives back R^N and + df: gives back R^NxM where M is + The number of parameters and N is the number of data + Need to take a slice out from f and a slice out of df + """ + #print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, + #func.__name__, dfunc.__name__) + partial_f = dparam_partial(func, *args) + partial_df = dparam_partial(dfunc, *args) + gradchecking = True + for param in params: + fnum = np.atleast_1d(partial_f(param)).shape[0] + dfnum = np.atleast_1d(partial_df(param)).shape[0] + for fixed_val in range(dfnum): + #dlik and dlik_dvar gives back 1 value for each + f_ind = min(fnum, fixed_val+1) - 1 + print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val) + #Make grad checker with this param moving, note that set_params is NOT being called + #The parameter is being set directly with __setattr__ + grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], + lambda x : np.atleast_1d(partial_df(x))[fixed_val], + param, 'p') + #This is not general for more than one param... + if constraints is not None: + for constraint in constraints: + constraint('p', grad) + if randomize: + grad.randomize() + if verbose: + print grad + grad.checkgrad(verbose=1) + if not grad.checkgrad(): + gradchecking = False + + return gradchecking + + +from nose.tools import with_setup +class TestNoiseModels(object): + """ + Generic model checker + """ + def setUp(self): + self.N = 5 + self.D = 3 + self.X = np.random.rand(self.N, self.D)*10 + + self.real_std = 0.1 + noise = np.random.randn(*self.X[:, 0].shape)*self.real_std + self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] + self.f = np.random.rand(self.N, 1) + self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None] + self.positive_Y = np.exp(self.Y.copy()) + tmp = np.round(self.X[:, 0]*3-3)[:, None] + np.random.randint(0,3, self.X.shape[0])[:, None] + self.integer_Y = np.where(tmp > 0, tmp, 0) + + self.var = 0.2 + + self.var = np.random.rand(1) + + #Make a bigger step as lower bound can be quite curved + self.step = 1e-3 + + def tearDown(self): + self.Y = None + self.f = None + self.X = None + + def test_noise_models(self): + self.setUp() + + #################################################### + # Constraint wrappers so we can just list them off # + #################################################### + def constrain_negative(regex, model): + model.constrain_negative(regex) + + def constrain_positive(regex, model): + model.constrain_positive(regex) + + def constrain_bounded(regex, model, lower, upper): + """ + Used like: partial(constrain_bounded, lower=0, upper=1) + """ + model.constrain_bounded(regex, lower, upper) + + """ + Dictionary where we nest models we would like to check + Name: { + "model": model_instance, + "grad_params": { + "names": [names_of_params_we_want, to_grad_check], + "vals": [values_of_params, to_start_at], + "constrain": [constraint_wrappers, listed_here] + }, + "laplace": boolean_of_whether_model_should_work_for_laplace, + "ep": boolean_of_whether_model_should_work_for_laplace, + "link_f_constraints": [constraint_wrappers, listed_here] + } + """ + noise_models = {"Student_t_default": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_1_var": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [1.0], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_small_var": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [0.01], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_large_var": { + "model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [10.0], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_approx_gauss": { + "model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Student_t_log": { + "model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var), + "grad_params": { + "names": ["t_noise"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True + }, + "Gaussian_default": { + "model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N), + "grad_params": { + "names": ["noise_model_variance"], + "vals": [self.var], + "constraints": [constrain_positive] + }, + "laplace": True, + "ep": True + }, + #"Gaussian_log": { + #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N), + #"grad_params": { + #"names": ["noise_model_variance"], + #"vals": [self.var], + #"constraints": [constrain_positive] + #}, + #"laplace": True + #}, + #"Gaussian_probit": { + #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N), + #"grad_params": { + #"names": ["noise_model_variance"], + #"vals": [self.var], + #"constraints": [constrain_positive] + #}, + #"laplace": True + #}, + #"Gaussian_log_ex": { + #"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N), + #"grad_params": { + #"names": ["noise_model_variance"], + #"vals": [self.var], + #"constraints": [constrain_positive] + #}, + #"laplace": True + #}, + "Bernoulli_default": { + "model": GPy.likelihoods.bernoulli(), + "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], + "laplace": True, + "Y": self.binary_Y, + "ep": True + }, + "Exponential_default": { + "model": GPy.likelihoods.exponential(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True, + }, + "Poisson_default": { + "model": GPy.likelihoods.poisson(), + "link_f_constraints": [constrain_positive], + "Y": self.integer_Y, + "laplace": True, + "ep": False #Should work though... + }, + "Gamma_default": { + "model": GPy.likelihoods.gamma(), + "link_f_constraints": [constrain_positive], + "Y": self.positive_Y, + "laplace": True + } + } + + for name, attributes in noise_models.iteritems(): + model = attributes["model"] + if "grad_params" in attributes: + params = attributes["grad_params"] + param_vals = params["vals"] + param_names= params["names"] + param_constraints = params["constraints"] + else: + params = [] + param_vals = [] + param_names = [] + constrain_positive = [] + param_constraints = [] # ??? TODO: Saul to Fix. + if "link_f_constraints" in attributes: + link_f_constraints = attributes["link_f_constraints"] + else: + link_f_constraints = [] + if "Y" in attributes: + Y = attributes["Y"].copy() + else: + Y = self.Y.copy() + if "f" in attributes: + f = attributes["f"].copy() + else: + f = self.f.copy() + if "laplace" in attributes: + laplace = attributes["laplace"] + else: + laplace = False + if "ep" in attributes: + ep = attributes["ep"] + else: + ep = False + + if len(param_vals) > 1: + raise NotImplementedError("Cannot support multiple params in likelihood yet!") + + #Required by all + #Normal derivatives + yield self.t_logpdf, model, Y, f + yield self.t_dlogpdf_df, model, Y, f + yield self.t_d2logpdf_df2, model, Y, f + #Link derivatives + yield self.t_dlogpdf_dlink, model, Y, f, link_f_constraints + yield self.t_d2logpdf_dlink2, model, Y, f, link_f_constraints + if laplace: + #Laplace only derivatives + yield self.t_d3logpdf_df3, model, Y, f + yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints + #Params + yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints + yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints + #Link params + yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints + yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints + yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints + + #laplace likelihood gradcheck + yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + if ep: + #ep likelihood gradcheck + yield self.t_ep_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints + + + self.tearDown() + + ############# + # dpdf_df's # + ############# + @with_setup(setUp, tearDown) + def t_logpdf(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + print model + print model._get_params() + np.testing.assert_almost_equal( + model.pdf(f.copy(), Y.copy()), + np.exp(model.logpdf(f.copy(), Y.copy())) + ) + + @with_setup(setUp, tearDown) + def t_dlogpdf_df(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + self.description = "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(model.logpdf, y=Y) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) + grad = GradientChecker(logpdf, dlogpdf_df, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2logpdf_df2(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_df = functools.partial(model.dlogpdf_df, y=Y) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3logpdf_df3(self, model, Y, f): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_df2 = functools.partial(model.d2logpdf_df2, y=Y) + d3logpdf_df3 = functools.partial(model.d3logpdf_df3, y=Y) + grad = GradientChecker(d2logpdf_df2, d3logpdf_df3, f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + print model + assert grad.checkgrad() + + ############## + # df_dparams # + ############## + @with_setup(setUp, tearDown) + def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=True, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=True, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=True, verbose=True) + ) + + ################ + # dpdf_dlink's # + ################ + @with_setup(setUp, tearDown) + def t_dlogpdf_dlink(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + logpdf = functools.partial(model.logpdf_link, y=Y) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) + grad = GradientChecker(logpdf, dlogpdf_dlink, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + print grad + grad.checkgrad(verbose=1) + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d2logpdf_dlink2(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + dlogpdf_dlink = functools.partial(model.dlogpdf_dlink, y=Y) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) + grad = GradientChecker(dlogpdf_dlink, d2logpdf_dlink2, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + grad.checkgrad(verbose=1) + print grad + assert grad.checkgrad() + + @with_setup(setUp, tearDown) + def t_d3logpdf_dlink3(self, model, Y, f, link_f_constraints): + print "\n{}".format(inspect.stack()[0][3]) + d2logpdf_dlink2 = functools.partial(model.d2logpdf_dlink2, y=Y) + d3logpdf_dlink3 = functools.partial(model.d3logpdf_dlink3, y=Y) + grad = GradientChecker(d2logpdf_dlink2, d3logpdf_dlink3, f.copy(), 'g') + + #Apply constraints to link_f values + for constraint in link_f_constraints: + constraint('g', grad) + + grad.randomize() + grad.checkgrad(verbose=1) + print grad + assert grad.checkgrad() + + ################# + # dlink_dparams # + ################# + @with_setup(setUp, tearDown) + def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + @with_setup(setUp, tearDown) + def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints): + print "\n{}".format(inspect.stack()[0][3]) + print model + assert ( + dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, + params, args=(f, Y), constraints=param_constraints, + randomize=False, verbose=True) + ) + + ################ + # laplace test # + ################ + @with_setup(setUp, tearDown) + def t_laplace_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + print "\n{}".format(inspect.stack()[0][3]) + #Normalize + Y = Y/Y.max() + white_var = 1e-6 + kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model) + m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood) + m.ensure_default_constraints() + m.constrain_fixed('white', white_var) + + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + constraints[param_num](name, m) + + print m + m.randomize() + #m.optimize(max_iters=8) + print m + m.checkgrad(verbose=1, step=step) + #if not m.checkgrad(step=step): + #m.checkgrad(verbose=1, step=step) + #import ipdb; ipdb.set_trace() + #NOTE this test appears to be stochastic for some likelihoods (student t?) + # appears to all be working in test mode right now... + assert m.checkgrad(step=step) + + ########### + # EP test # + ########### + @with_setup(setUp, tearDown) + def t_ep_fit_rbf_white(self, model, X, Y, f, step, param_vals, param_names, constraints): + print "\n{}".format(inspect.stack()[0][3]) + #Normalize + Y = Y/Y.max() + white_var = 1e-6 + kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + ep_likelihood = GPy.likelihoods.EP(Y.copy(), model) + m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood) + m.ensure_default_constraints() + m.constrain_fixed('white', white_var) + + for param_num in range(len(param_names)): + name = param_names[param_num] + m[name] = param_vals[param_num] + constraints[param_num](name, m) + + m.randomize() + m.checkgrad(verbose=1, step=step) + print m + assert m.checkgrad(step=step) + + +class LaplaceTests(unittest.TestCase): + """ + Specific likelihood tests, not general enough for the above tests + """ + + def setUp(self): + self.N = 5 + self.D = 3 + self.X = np.random.rand(self.N, self.D)*10 + + self.real_std = 0.1 + noise = np.random.randn(*self.X[:, 0].shape)*self.real_std + self.Y = (np.sin(self.X[:, 0]*2*np.pi) + noise)[:, None] + self.f = np.random.rand(self.N, 1) + + self.var = 0.2 + + self.var = np.random.rand(1) + self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var) + self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N) + + #Make a bigger step as lower bound can be quite curved + self.step = 1e-6 + + def tearDown(self): + self.stu_t = None + self.gauss = None + self.Y = None + self.f = None + self.X = None + + def test_gaussian_d2logpdf_df2_2(self): + print "\n{}".format(inspect.stack()[0][3]) + self.Y = None + self.gauss = None + + self.N = 2 + self.D = 1 + self.X = np.linspace(0, self.D, self.N)[:, None] + self.real_std = 0.2 + noise = np.random.randn(*self.X.shape)*self.real_std + self.Y = np.sin(self.X*2*np.pi) + noise + self.f = np.random.rand(self.N, 1) + self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) + + dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) + d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) + grad = GradientChecker(dlogpdf_df, d2logpdf_df2, self.f.copy(), 'g') + grad.randomize() + grad.checkgrad(verbose=1) + self.assertTrue(grad.checkgrad()) + + def test_laplace_log_likelihood(self): + debug = False + real_std = 0.1 + initial_var_guess = 0.5 + + #Start a function, any function + X = np.linspace(0.0, np.pi*2, 100)[:, None] + Y = np.sin(X) + np.random.randn(*X.shape)*real_std + Y = Y/Y.max() + #Yc = Y.copy() + #Yc[75:80] += 1 + kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + kernel2 = kernel1.copy() + + m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) + m1.constrain_fixed('white', 1e-6) + m1['noise'] = initial_var_guess + m1.constrain_bounded('noise', 1e-4, 10) + m1.constrain_bounded('rbf', 1e-4, 10) + m1.ensure_default_constraints() + m1.randomize() + + gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0]) + laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr) + m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood) + m2.ensure_default_constraints() + m2.constrain_fixed('white', 1e-6) + m2.constrain_bounded('rbf', 1e-4, 10) + m2.constrain_bounded('noise', 1e-4, 10) + m2.randomize() + + if debug: + print m1 + print m2 + optimizer = 'scg' + print "Gaussian" + m1.optimize(optimizer, messages=debug) + print "Laplace Gaussian" + m2.optimize(optimizer, messages=debug) + if debug: + print m1 + print m2 + + m2._set_params(m1._get_params()) + + #Predict for training points to get posterior mean and variance + post_mean, post_var, _, _ = m1.predict(X) + post_mean_approx, post_var_approx, _, _ = m2.predict(X) + + if debug: + import pylab as pb + pb.figure(5) + pb.title('posterior means') + pb.scatter(X, post_mean, c='g') + pb.scatter(X, post_mean_approx, c='r', marker='x') + + pb.figure(6) + pb.title('plot_f') + m1.plot_f(fignum=6) + m2.plot_f(fignum=6) + fig, axes = pb.subplots(2, 1) + fig.suptitle('Covariance matricies') + a1 = pb.subplot(121) + a1.matshow(m1.likelihood.covariance_matrix) + a2 = pb.subplot(122) + a2.matshow(m2.likelihood.covariance_matrix) + + pb.figure(8) + pb.scatter(X, m1.likelihood.Y, c='g') + pb.scatter(X, m2.likelihood.Y, c='r', marker='x') + + + + #Check Y's are the same + np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5) + #Check marginals are the same + np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) + #Check marginals are the same with random + m1.randomize() + m2._set_params(m1._get_params()) + np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) + + #Check they are checkgradding + #m1.checkgrad(verbose=1) + #m2.checkgrad(verbose=1) + self.assertTrue(m1.checkgrad()) + self.assertTrue(m2.checkgrad()) + +if __name__ == "__main__": + print "Running unit tests" + unittest.main() diff --git a/GPy/testing/parameter_testing.py b/GPy/testing/parameter_testing.py deleted file mode 100644 index 4ab9b810..00000000 --- a/GPy/testing/parameter_testing.py +++ /dev/null @@ -1,141 +0,0 @@ -''' -Created on 4 Sep 2013 - -@author: maxz -''' -import unittest -from GPy.kern.constructors import rbf, linear, white -import numpy -from GPy.models.bayesian_gplvm import BayesianGPLVM -from GPy.likelihoods.gaussian import Gaussian -import pickle -import os -from GPy.core.parameterized import Parameterized -from GPy.core.parameter import Param - - -class Test(unittest.TestCase): - N, D, Q = 10, 6, 4 - def setUp(self): - self.rbf_variance = numpy.random.rand() - self.rbf_lengthscale = numpy.random.rand(self.Q) - self.linear_variance = numpy.random.rand(self.Q) - self.noise_variance = numpy.random.rand(1) - self.kern = (rbf(self.Q, self.rbf_variance, self.rbf_lengthscale, ARD=True) - + linear(self.Q, self.linear_variance, ARD=True) - + white(self.Q, self.noise_variance)) - self.X = numpy.random.rand(self.N, self.Q) + 10 - self.X_variance = numpy.random.rand(self.N, self.Q) * .2 - - K = self.kern.K(self.X) - - self.Y = numpy.random.multivariate_normal(numpy.zeros(self.N), K + numpy.eye(self.N) * .2, self.D).T - -# self.bgplvm = BayesianGPLVM(Gaussian(self.Y, variance=self.noise_variance), self.Q, self.X, self.X_variance, kernel=self.kern) -# self.bgplvm.ensure_default_constraints(warning=False) -# self.bgplvm.tie_params("noise_variance|white_variance") -# self.bgplvm.constrain_fixed("rbf_var", warning=False) - self.parameter = Parameterized([ - Parameterized([ - Param('X', self.X), - Param('X_variance', self.X_variance), - ]), - Param('iip', self.bgplvm.Z), - Parameterized([ - Param('rbf_variance', self.rbf_variance), - Param('rbf_lengthscale', self.rbf_lengthscale) - ]), - Param('linear_variance', self.linear_variance), - Param('white_variance', self.noise_variance), - Param('noise_variance', self.noise_variance), - ]) - - self.parameter['.*variance'].constrain_positive(False) - self.parameter['.*length'].constrain_positive(False) - self.parameter.white.tie_to(self.parameter.noise) - self.parameter.rbf_var.constrain_fixed(False) - - def tearDown(self): - pass - -# def testGrepParamNamesTest(self): -# assert(self.bgplvm.grep_param_names('X_\d') == self.parameter.grep_param_names('X_\d')) -# assert(self.bgplvm.grep_param_names('X_\d+_1') == self.parameter.grep_param_names('X_\d+_1')) -# assert(self.bgplvm.grep_param_names('X_\d_1') == self.parameter.grep_param_names('X_\d_1')) -# assert(self.bgplvm.grep_param_names('X_.+_1') == self.parameter.grep_param_names('X_.+_1')) -# assert(self.bgplvm.grep_param_names('X_1_1') == self.parameter.grep_param_names('X_1_1')) -# assert(self.bgplvm.grep_param_names('X') == self.parameter.grep_param_names('X')) -# assert(self.bgplvm.grep_param_names('rbf') == self.parameter.grep_param_names('rbf')) -# assert(self.bgplvm.grep_param_names('rbf_l.*_1') == self.parameter.grep_param_names('rbf_l.*_1')) -# assert(self.bgplvm.grep_param_names('l') == self.parameter.grep_param_names('l')) -# assert(self.bgplvm.grep_param_names('dont_match') == self.parameter.grep_param_names('dont_match')) -# assert(self.bgplvm.grep_param_names('.*') == self.parameter.grep_param_names('.*')) - - def testGetParams(self): - assert(numpy.allclose(self.bgplvm._get_params(), self.parameter._get_params())) - assert(numpy.allclose(self.bgplvm._get_params_transformed(), self.parameter._get_params_transformed())) - - def testSetParams(self): - self.bgplvm.randomize() - self.parameter._set_params(self.bgplvm._get_params()) - assert(numpy.allclose(self.bgplvm._get_params(), self.parameter._get_params())) - assert(numpy.allclose(self.bgplvm._get_params_transformed(), self.parameter._get_params_transformed())) - self.bgplvm.randomize() - self.parameter._set_params_transformed(self.bgplvm._get_params_transformed()) - assert(numpy.allclose(self.bgplvm._get_params(), self.parameter._get_params())) - assert(numpy.allclose(self.bgplvm._get_params_transformed(), self.parameter._get_params_transformed())) - - def testSlicing(self): - assert(numpy.allclose(self.parameter.X[:,1], self.X[:,1])) - assert(numpy.allclose(self.parameter.X[:,1], self.X[:,1])) - assert(numpy.allclose(self.parameter.X_variance[1,1], self.X_variance[1,1])) - assert(numpy.allclose(self.parameter.X_variance[:], self.X_variance[:])) - assert(numpy.allclose(self.parameter.X[:,:][:,0:2][:,1], self.X[:,1])) - assert(numpy.allclose(self.parameter.X[:,1], self.X[:,1])) - assert(numpy.allclose(self.parameter.X_variance[1,1], self.X_variance[1,1])) - assert(numpy.allclose(self.parameter.X_variance[:], self.X_variance[:])) - - def testSlicingSet(self): - self.parameter['.*variance'] = 1. - assert(numpy.alltrue(self.parameter['.*variance'] == 1.)) - self.parameter.X[0,:3] = 2 - assert(numpy.alltrue(self.parameter.X[0,:3] == 2)) - X = self.parameter.X.copy() - self.parameter.X[[0,4,9],[0,1,3]] -= 1 - assert(numpy.alltrue((X[[0,4,9],[0,1,3]] - 1) == self.parameter.X[[0,4,9],[0,1,3]])) - self.parameter[''] = 10 - assert(numpy.alltrue(self.parameter[''] == 10)) - - def testConstraints(self): - self.parameter[''].unconstrain() - self.parameter.X.constrain_positive() - self.parameter.X[:,numpy.s_[0::2]].unconstrain_positive() - assert(numpy.alltrue(self.parameter.constraints.indices()[0] == numpy.r_[1:self.N*self.Q:2])) - - def testNdarrayFunc(self): - assert(numpy.alltrue(self.parameter.X * self.parameter.X == self.X * self.X)) - assert(numpy.alltrue(self.parameter.X[0,:] * self.parameter.X[1,:] == self.X[0,:] * self.X[1,:])) - - def testPickle(self): - fname = '/tmp/GPy_io_test.pickle' - m = self.parameter - m.X.fix() - self.parameter.pickle(fname) - with open(fname, 'r') as f: - m2 = pickle.load(f) - self.assertEqual(m.__str__(), m2.__str__()) - self.assertEqual(m.X_v.__str__(), m2.X_v.__str__()) - os.remove(fname) - - -if __name__ == "__main__": - import sys;sys.argv = ['', - 'Test.testSlicing', - 'Test.testGetParams', - 'Test.testNdarrayFunc', - 'Test.testSetParams', - 'Test.testConstraints', - 'Test.testSlicingSet', - 'Test.testPickle', - ] - unittest.main() diff --git a/GPy/testing/psi_stat_expectation_tests.py b/GPy/testing/psi_stat_expectation_tests.py index 08f938fb..90252197 100644 --- a/GPy/testing/psi_stat_expectation_tests.py +++ b/GPy/testing/psi_stat_expectation_tests.py @@ -27,9 +27,9 @@ def ard(p): @testing.deepTest(__test__()) class Test(unittest.TestCase): input_dim = 9 - num_inducing = 4 - N = 3 - Nsamples = 5e3 + num_inducing = 13 + N = 300 + Nsamples = 1e6 def setUp(self): i_s_dim_list = [2,4,3] @@ -45,20 +45,29 @@ class Test(unittest.TestCase): input_slices = input_slices ) self.kerns = ( - input_slice_kern, - (GPy.kern.rbf(self.input_dim, ARD=True) + - GPy.kern.linear(self.input_dim, ARD=True) + - GPy.kern.bias(self.input_dim) + - GPy.kern.white(self.input_dim)), - (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + - GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + - GPy.kern.bias(self.input_dim) + - GPy.kern.white(self.input_dim)), -# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), +# input_slice_kern, +# (GPy.kern.rbf(self.input_dim, ARD=True) + +# GPy.kern.linear(self.input_dim, ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), + (#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +# +GPy.kern.bias(self.input_dim) +# +GPy.kern.white(self.input_dim)), + ), +# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + +# GPy.kern.bias(self.input_dim, np.random.rand())), +# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +# +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +# #+GPy.kern.bias(self.input_dim, np.random.rand()) +# #+GPy.kern.white(self.input_dim, np.random.rand())), +# ), +# GPy.kern.white(self.input_dim, np.random.rand())), +# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), -# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), # GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), # GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim), @@ -79,7 +88,7 @@ class Test(unittest.TestCase): def test_psi1(self): for kern in self.kerns: - Nsamples = np.floor(self.Nsamples/300.) + Nsamples = np.floor(self.Nsamples/self.N) psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] @@ -105,31 +114,31 @@ class Test(unittest.TestCase): def test_psi2(self): for kern in self.kerns: - Nsamples = self.Nsamples/10. + Nsamples = int(np.floor(self.Nsamples/self.N)) psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): K = kern.K(q_x_sample_stripe, self.Z) - K = (K[:, :, None] * K[:, None, :]).mean(0) - K_ += K - diffs.append(((psi2 - (K_ / (i + 1)))**2).mean()) - K_ /= self.Nsamples / Nsamples + K = (K[:, :, None] * K[:, None, :]) + K_ += K.sum(0) / self.Nsamples + diffs.append(((psi2 - (K_*self.Nsamples/((i+1)*Nsamples)))**2).mean()) + #K_ /= self.Nsamples / Nsamples msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts])) try: import pylab pylab.figure(msg) - pylab.plot(diffs) + pylab.plot(diffs, marker='x', mew=.2) # print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) - self.assertTrue(np.allclose(psi2.squeeze(), K_, - rtol=1e-1, atol=.1), + self.assertTrue(np.allclose(psi2.squeeze(), K_), + #rtol=1e-1, atol=.1), msg=msg + ": not matching") # sys.stdout.write(".") except: -# import ipdb;ipdb.set_trace() # kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) # sys.stdout.write("E") print msg + ": not matching" + import ipdb;ipdb.set_trace() pass if __name__ == "__main__": diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py index de670f41..e373aaa3 100644 --- a/GPy/testing/psi_stat_gradient_tests.py +++ b/GPy/testing/psi_stat_gradient_tests.py @@ -40,10 +40,9 @@ class PsiStatModel(Model): return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() def _log_likelihood_gradients(self): psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) - try: - psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) - except AttributeError: - psiZ = numpy.zeros(self.num_inducing * self.input_dim) + #psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim) + psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) + #psiZ = numpy.ones(self.num_inducing * self.input_dim) thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) @@ -64,40 +63,54 @@ class DPsiStatTest(unittest.TestCase): def testPsi0(self): for k in self.kernels: - m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, + m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\ num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) - -# def testPsi1(self): -# for k in self.kernels: -# m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, -# num_inducing=self.num_inducing, kernel=k) -# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) + + def testPsi1(self): + for k in self.kernels: + m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, + num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() + assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin(self): k = self.kernels[0] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - num_inducing=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin_bia(self): k = self.kernels[3] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf(self): k = self.kernels[1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf_bia(self): k = self.kernels[-1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_bia(self): k = self.kernels[2] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, num_inducing=self.num_inducing, kernel=k) + m.ensure_default_constraints() + m.randomize() assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) @@ -116,9 +129,9 @@ if __name__ == "__main__": # m.randomize() # # self.assertTrue(m.checkgrad()) numpy.random.seed(0) - input_dim = 5 - N = 50 - num_inducing = 10 + input_dim = 3 + N = 3 + num_inducing = 2 D = 15 X = numpy.random.randn(N, input_dim) X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) @@ -135,18 +148,35 @@ if __name__ == "__main__": # num_inducing=num_inducing, kernel=k) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # -# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, -# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim)) + m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, + num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)+GPy.kern.bias(input_dim)) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # num_inducing=num_inducing, kernel=kernel) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # num_inducing=num_inducing, kernel=kernel) # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) - m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) +# m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, +# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) # + GPy.kern.bias(input_dim)) -# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, -# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) +# m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, +# num_inducing=num_inducing, +# kernel=( +# GPy.kern.rbf(input_dim, ARD=1) +# +GPy.kern.linear(input_dim, ARD=1) +# +GPy.kern.bias(input_dim)) +# ) +# m.ensure_default_constraints() + m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, + num_inducing=num_inducing, kernel=( + GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) + #+GPy.kern.linear(input_dim, numpy.random.rand(input_dim), ARD=1) + #+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) + #+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(), ARD=0) + +GPy.kern.bias(input_dim) + +GPy.kern.white(input_dim) + ) + ) + m2.ensure_default_constraints() else: unittest.main() diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py index e27fccff..c3942b95 100644 --- a/GPy/testing/sparse_gplvm_tests.py +++ b/GPy/testing/sparse_gplvm_tests.py @@ -4,7 +4,7 @@ import unittest import numpy as np import GPy -from GPy.models.sparse_gplvm import SparseGPLVM +from ..models import SparseGPLVM class sparse_GPLVMTests(unittest.TestCase): def test_bias_kern(self): diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index e4d9e063..9269a4c4 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -163,14 +163,18 @@ class GradientTests(unittest.TestCase): rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2) + #@unittest.expectedFailure def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs''' rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) + raise unittest.SkipTest("This is not implemented yet!") self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) + #@unittest.expectedFailure def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs''' rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1) + raise unittest.SkipTest("This is not implemented yet!") self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1) def test_GPLVM_rbf_bias_white_kern_2D(self): @@ -209,7 +213,7 @@ class GradientTests(unittest.TestCase): Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) - #distribution = GPy.likelihoods.likelihood_functions.Binomial() + #distribution = GPy.likelihoods.likelihood_functions.Bernoulli() #likelihood = GPy.likelihoods.EP(Y, distribution) #m = GPy.core.SparseGP(X, likelihood, kernel, Z) #m.ensure_default_constraints()