From 23ff53f7f81f61723454e3ce1250246156120a1b Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 16 Dec 2013 11:39:39 +0000
Subject: [PATCH] BGPLVM with missing data

---
 GPy/core/gp_base.py                      |  6 +--
 GPy/examples/dimensionality_reduction.py | 65 +++++++++++++++++++++++-
 GPy/models.py                            |  2 +-
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 548e2924..981ebbbb 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -218,8 +218,8 @@ class GPBase(Model):
                 Y = self.likelihood.data
             for d in which_data_ycols:
                 m_d = m[:,d].reshape(resolution, resolution).T
-                ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
-                ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
+                contour = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
+                scatter = ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
 
             #set the limits of the plot to some sensible values
             ax.set_xlim(xmin[0], xmax[0])
@@ -227,7 +227,7 @@ class GPBase(Model):
 
             if samples:
                 warnings.warn("Samples are rather difficult to plot for 2D inputs...")
-
+            return contour, scatter
         else:
             raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
 
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 9120805c..1a199519 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -52,6 +52,67 @@ def bgplvm_test_model(seed=default_seed, optimize=0, verbose=1, plot=0):
 
     return m
 
+def bgplvm_test_model_missing_data(seed=default_seed, optimize=0, verbose=1, plot=0):
+    """
+    model for testing purposes. Samples from a GP with rbf kernel and learns 
+    the samples with a new kernel. Normally not for optimization, just model cheking
+    """
+    from GPy.likelihoods.gaussian import Gaussian
+    import GPy, numpy as np
+    
+    num_inputs = 13
+    num_inducing = 5
+    if plot: 
+        output_dim = 1
+        input_dim = 2
+    else: 
+        input_dim = 2
+        output_dim = 25
+    
+    # generate GPLVM-like data
+    X = _np.random.rand(num_inputs, input_dim)
+    lengthscales = _np.random.rand(input_dim)
+    k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
+         + GPy.kern.white(input_dim, 0.01))
+    K = k.K(X)
+    Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
+    lik = Gaussian(Y, normalize=True)
+
+    k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
+    # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+    # k = GPy.kern.rbf(input_dim, ARD = False)  + GPy.kern.white(input_dim, 0.00001)
+    # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
+    # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
+    # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
+
+    m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
+    #===========================================================================
+    # randomly obstruct data with percentage p
+    p = .8
+    Y_obstruct = Y.copy()
+    Y_obstruct[np.random.uniform(size=(Y.shape)) < p] = np.nan
+    #===========================================================================
+    m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
+    m.lengthscales = lengthscales
+
+    if plot:
+        import matplotlib.pyplot as pb
+        m.plot()
+        pb.title('PCA initialisation')
+        m2.plot()
+        pb.title('PCA initialisation')
+   
+    if optimize:
+        m.optimize('scg', messages=verbose)
+        m2.optimize('scg', messages=verbose)
+        if plot:
+            m.plot()
+            pb.title('After optimisation')
+            m2.plot()
+            pb.title('After optimisation')
+
+    return m, m2
+
 def gplvm_oil_100(optimize=1, verbose=1, plot=1):
     import GPy
     data = GPy.util.datasets.oil_100()
@@ -205,7 +266,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
     Ylist = [Y1, Y2, Y3]
 
     if plot_sim:
-        import pylab
+        import pylab, matplotlib.cm as cm
         import itertools
         fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
         fig.clf()
@@ -216,7 +277,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
         ax.legend()
         for i, Y in enumerate(Ylist):
             ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
-            ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
+            ax.imshow(Y, aspect='auto', cmap=cm.gray)  # @UndefinedVariable
             ax.set_title("Y{}".format(i + 1))
         pylab.draw()
         pylab.tight_layout()
diff --git a/GPy/models.py b/GPy/models.py
index 76d14819..0aea59a0 100644
--- a/GPy/models.py
+++ b/GPy/models.py
@@ -14,7 +14,7 @@ detailed explanations for the different models.
 
 __updated__ = '2013-11-28'
 
-from models_modules.bayesian_gplvm import BayesianGPLVM
+from models_modules.bayesian_gplvm import BayesianGPLVM, BayesianGPLVMWithMissingData
 from models_modules.gp_regression import GPRegression
 from models_modules.gp_classification import GPClassification#; _gp_classification = gp_classification ; del gp_classification 
 from models_modules.sparse_gp_regression import SparseGPRegression#; _sparse_gp_regression = sparse_gp_regression ; del sparse_gp_regression