From 23ff53f7f81f61723454e3ce1250246156120a1b Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 16 Dec 2013 11:39:39 +0000 Subject: [PATCH] BGPLVM with missing data --- GPy/core/gp_base.py | 6 +-- GPy/examples/dimensionality_reduction.py | 65 +++++++++++++++++++++++- GPy/models.py | 2 +- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 548e2924..981ebbbb 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -218,8 +218,8 @@ class GPBase(Model): Y = self.likelihood.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T - ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + contour = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) + scatter = ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -227,7 +227,7 @@ class GPBase(Model): if samples: warnings.warn("Samples are rather difficult to plot for 2D inputs...") - + return contour, scatter else: raise NotImplementedError, "Cannot define a frame with more than two input dimensions" diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 9120805c..1a199519 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -52,6 +52,67 @@ def bgplvm_test_model(seed=default_seed, optimize=0, verbose=1, plot=0): return m +def bgplvm_test_model_missing_data(seed=default_seed, optimize=0, verbose=1, plot=0): + """ + model for testing purposes. Samples from a GP with rbf kernel and learns + the samples with a new kernel. Normally not for optimization, just model cheking + """ + from GPy.likelihoods.gaussian import Gaussian + import GPy, numpy as np + + num_inputs = 13 + num_inducing = 5 + if plot: + output_dim = 1 + input_dim = 2 + else: + input_dim = 2 + output_dim = 25 + + # generate GPLVM-like data + X = _np.random.rand(num_inputs, input_dim) + lengthscales = _np.random.rand(input_dim) + k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + + GPy.kern.white(input_dim, 0.01)) + K = k.K(X) + Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T + lik = Gaussian(Y, normalize=True) + + k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) + # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + + m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) + #=========================================================================== + # randomly obstruct data with percentage p + p = .8 + Y_obstruct = Y.copy() + Y_obstruct[np.random.uniform(size=(Y.shape)) < p] = np.nan + #=========================================================================== + m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) + m.lengthscales = lengthscales + + if plot: + import matplotlib.pyplot as pb + m.plot() + pb.title('PCA initialisation') + m2.plot() + pb.title('PCA initialisation') + + if optimize: + m.optimize('scg', messages=verbose) + m2.optimize('scg', messages=verbose) + if plot: + m.plot() + pb.title('After optimisation') + m2.plot() + pb.title('After optimisation') + + return m, m2 + def gplvm_oil_100(optimize=1, verbose=1, plot=1): import GPy data = GPy.util.datasets.oil_100() @@ -205,7 +266,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): Ylist = [Y1, Y2, Y3] if plot_sim: - import pylab + import pylab, matplotlib.cm as cm import itertools fig = pylab.figure("MRD Simulation Data", figsize=(8, 6)) fig.clf() @@ -216,7 +277,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): ax.legend() for i, Y in enumerate(Ylist): ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) - ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable + ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.set_title("Y{}".format(i + 1)) pylab.draw() pylab.tight_layout() diff --git a/GPy/models.py b/GPy/models.py index 76d14819..0aea59a0 100644 --- a/GPy/models.py +++ b/GPy/models.py @@ -14,7 +14,7 @@ detailed explanations for the different models. __updated__ = '2013-11-28' -from models_modules.bayesian_gplvm import BayesianGPLVM +from models_modules.bayesian_gplvm import BayesianGPLVM, BayesianGPLVMWithMissingData from models_modules.gp_regression import GPRegression from models_modules.gp_classification import GPClassification#; _gp_classification = gp_classification ; del gp_classification from models_modules.sparse_gp_regression import SparseGPRegression#; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression