From e869fcaf65d0ec99359723cee608ac7c1f17446a Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 25 Jun 2013 17:43:42 +0100
Subject: [PATCH 01/20] pcikling?

---
 GPy/core/gp.py               |   4 +
 GPy/core/gp_base.py          |  26 +++
 GPy/core/model.py            |  19 ++
 GPy/core/parameterised.py    |  18 ++
 GPy/core/sparse_gp.py        |  28 ++-
 GPy/kern/kern.py             |   5 +-
 GPy/models/bayesian_gplvm.py | 362 ++---------------------------------
 GPy/models/gplvm.py          |  24 +--
 GPy/models/mrd.py            |  33 +++-
 9 files changed, 149 insertions(+), 370 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 5172d9e7..e3bbc85d 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -31,6 +31,10 @@ class GP(GPBase):
         GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self._set_params(self._get_params())
 
+    def __setstate__(self, state):
+        GPBase.__setstate__(self, state)
+        self._set_params(self._get_params())
+
     def _set_params(self, p):
         self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
         self.likelihood._set_params(p[self.kern.num_params_transformed():])
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 0799e0c2..528d6acc 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -32,6 +32,32 @@ class GPBase(Model):
         # All leaf nodes should call self._set_params(self._get_params()) at
         # the end
 
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return Model.__getstate__(self) + [self.X,
+                self.num_data,
+                self.input_dim,
+                self.kern,
+                self.likelihood,
+                self.output_dim,
+                self._Xoffset,
+                self._Xscale]
+
+    def __setstate__(self, state):
+        self._Xscale = state.pop()
+        self._Xoffset = state.pop()
+        self.output_dim = state.pop()
+        self.likelihood = state.pop()
+        self.kern = state.pop()
+        self.input_dim = state.pop()
+        self.num_data = state.pop()
+        self.X = state.pop()
+        Model.__setstate__(self, state)
+        self._set_params(self._get_params())
+
     def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
         """
         Plot the GP's view of the world, where the data is normalized and the
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 05375b2a..3c5c8419 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -32,6 +32,25 @@ class Model(Parameterised):
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the Model class"
 
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return Parameterised.__getstate__(self) + \
+            [self.priors, self.optimization_runs,
+             self.sampling_runs, self.preferred_optimizer]
+
+    def __setstate__(self, state):
+        """
+        set state from previous call to getstate
+        """
+        self.preferred_optimizer = state.pop()
+        self.sampling_runs = state.pop()
+        self.optimization_runs = state.pop()
+        self.priors = state.pop()
+        Parameterised.__setstate__(self, state)
+
     def set_prior(self, regexp, what):
         """
         Sets priors on the Model parameters.
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index b3a5712a..13467fdf 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -29,6 +29,24 @@ class Parameterised(object):
         """Returns a (deep) copy of the current model """
         return copy.deepcopy(self)
 
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return [self.tied_indices,
+                self.fixed_indices,
+                self.fixed_values,
+                self.constrained_indices,
+                self.constraints]
+
+    def __setstate__(self, state):
+        self.constraints = state.pop()
+        self.constrained_indices = state.pop()
+        self.fixed_values = state.pop()
+        self.fixed_indices = state.pop()
+        self.tied_indices = state.pop()
+
     @property
     def params(self):
         """
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 3183cff0..9c36cd2c 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -33,10 +33,11 @@ class SparseGP(GPBase):
 
         self.Z = Z
         self.num_inducing = Z.shape[0]
-        self.likelihood = likelihood
+#         self.likelihood = likelihood
 
         if X_variance is None:
             self.has_uncertain_inputs = False
+            self.X_variance = None
         else:
             assert X_variance.shape == X.shape
             self.has_uncertain_inputs = True
@@ -49,6 +50,23 @@ class SparseGP(GPBase):
         if self.has_uncertain_inputs:
             self.X_variance /= np.square(self._Xscale)
 
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return GPBase.__getstate__(self) + [self.Z,
+                self.num_inducing,
+                self.has_uncertain_inputs,
+                self.X_variance]
+
+    def __setstate__(self, state):
+        self.X_variance = state.pop()
+        self.has_uncertain_inputs = state.pop()
+        self.num_inducing = state.pop()
+        self.Z = state.pop()
+        GPBase.__setstate__(self, state)
+
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
         self.Kmm = self.kern.K(self.Z)
@@ -78,7 +96,7 @@ class SparseGP(GPBase):
             tmp = tmp.T
         else:
             if self.likelihood.is_heteroscedastic:
-                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data,1)))
+                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
             else:
                 tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
         tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)
@@ -163,7 +181,7 @@ class SparseGP(GPBase):
         return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
 
     def _get_param_names(self):
-        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[])\
+        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
             + self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
 
     def update_likelihood_approximation(self):
@@ -220,7 +238,7 @@ class SparseGP(GPBase):
     def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
         """Internal helper function for making predictions, does not account for normalization"""
 
-        Bi, _ = dpotri(self.LB, lower=0)  # WTH? this lower switch should be 1, but that doesn't work!
+        Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
         symmetrify(Bi)
         Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
 
@@ -293,7 +311,7 @@ class SparseGP(GPBase):
         GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
         if self.X.shape[1] == 1:
             if self.has_uncertain_inputs:
-                Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
                 ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                             xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                             ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 76a0d99e..99689a74 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -45,14 +45,15 @@ class kern(Parameterised):
         Parameterised.__init__(self)
 
 
-    def plot_ARD(self, fignum=None, ax=None):
+    def plot_ARD(self, fignum=None, ax=None, title=None):
         """If an ARD kernel is present, it bar-plots the ARD parameters"""
         if ax is None:
             fig = pb.figure(fignum)
             ax = fig.add_subplot(111)
         for p in self.parts:
             if hasattr(p, 'ARD') and p.ARD:
-                ax.set_title('ARD parameters, %s kernel' % p.name)
+                if title is None:
+                    ax.set_title('ARD parameters, %s kernel' % p.name)
 
                 if p.name == 'linear':
                     ard_params = p.variances
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 8043c635..22ac9ede 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -45,32 +45,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
         if kernel is None:
             kernel = kern.rbf(input_dim) + kern.white(input_dim)
 
-        self.oldpsave = oldpsave
-        self._oldps = []
-        self._debug = _debug
-
-        if self._debug:
-            self.f_call = 0
-            self._count = itertools.count()
-            self._savedklll = []
-            self._savedparams = []
-            self._savedgradients = []
-            self._savederrors = []
-            self._savedpsiKmm = []
-            self._savedABCD = []
-
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
         self._set_params(self._get_params())
 
-    @property
-    def oldps(self):
-        return self._oldps
-    @oldps.setter
-    def oldps(self, p):
-        if len(self._oldps) == (self.oldpsave + 1):
-            self._oldps.pop()
-        # if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
-        self._oldps.insert(0, p.copy())
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return [self.init] + SparseGP.__getstate__(self)
+
+    def __setstate__(self, state):
+        self.init = state.pop()
+        SparseGP.__setstate__(self, state)
 
     def _get_param_names(self):
         X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
@@ -90,24 +77,11 @@ class BayesianGPLVM(SparseGP, GPLVM):
         x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
         return x
 
-    def _clipped(self, x):
-        return x # np.clip(x, -1e300, 1e300)
-
     def _set_params(self, x, save_old=True, save_count=0):
-#         try:
-            x = self._clipped(x)
-            N, input_dim = self.num_data, self.input_dim
-            self.X = x[:self.X.size].reshape(N, input_dim).copy()
-            self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
-            SparseGP._set_params(self, x[(2 * N * input_dim):])
-#             self.oldps = x
-#         except (LinAlgError, FloatingPointError, ZeroDivisionError):
-#             print "\rWARNING: Caught LinAlgError, continueing without setting            "
-#             if self._debug:
-#                 self._savederrors.append(self.f_call)
-#             if save_count > 10:
-#                 raise
-#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
+        N, input_dim = self.num_data, self.input_dim
+        self.X = x[:self.X.size].reshape(N, input_dim).copy()
+        self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
+        SparseGP._set_params(self, x[(2 * N * input_dim):])
 
     def dKL_dmuS(self):
         dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@@ -131,53 +105,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
     def log_likelihood(self):
         ll = SparseGP.log_likelihood(self)
         kl = self.KL_divergence()
-
-#         if ll < -2E4:
-#             ll = -2E4 + np.random.randn()
-#         if kl > 5E4:
-#             kl = 5E4 + np.random.randn()
-
-        if self._debug:
-            self.f_call = self._count.next()
-            if self.f_call % 1 == 0:
-                self._savedklll.append([self.f_call, ll, kl])
-                self._savedparams.append([self.f_call, self._get_params()])
-                self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
-                self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
-#                 sf2 = self.scale_factor ** 2
-                if self.likelihood.is_heteroscedastic:
-                    A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
-#                     B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
-                    B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
-                else:
-                    A = -0.5 * self.num_data * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
-#                     B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
-                    B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
-                C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
-                D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
-                self._savedABCD.append([self.f_call, A, B, C, D])
-
-        # print "\nkl:", kl, "ll:", ll
         return ll - kl
 
     def _log_likelihood_gradients(self):
         dKL_dmu, dKL_dS = self.dKL_dmuS()
         dL_dmu, dL_dS = self.dL_dmuS()
-        # TODO: find way to make faster
-
         d_dmu = (dL_dmu - dKL_dmu).flatten()
         d_dS = (dL_dS - dKL_dS).flatten()
-        # TEST KL: ====================
-        # d_dmu = (dKL_dmu).flatten()
-        # d_dS = (dKL_dS).flatten()
-        # ========================
-        # TEST L: ====================
-#         d_dmu = (dL_dmu).flatten()
-#         d_dS = (dL_dS).flatten()
-        # ========================
         self.dbound_dmuS = np.hstack((d_dmu, d_dS))
         self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
-        return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
+        return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
 
     def plot_latent(self, *args, **kwargs):
         return plot_latent.plot_latent_indices(self, *args, **kwargs)
@@ -256,275 +193,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
         fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
         return fig
 
-    def __getstate__(self):
-        return (self.likelihood, self.input_dim, self.X, self.X_variance,
-                self.init, self.num_inducing, self.Z, self.kern,
-                self.oldpsave, self._debug)
-
-    def __setstate__(self, state):
-        self.__init__(*state)
-
-    def _debug_filter_params(self, x):
-        start, end = 0, self.X.size,
-        X = x[start:end].reshape(self.num_data, self.input_dim)
-        start, end = end, end + self.X_variance.size
-        X_v = x[start:end].reshape(self.num_data, self.input_dim)
-        start, end = end, end + (self.num_inducing * self.input_dim)
-        Z = x[start:end].reshape(self.num_inducing, self.input_dim)
-        start, end = end, end + self.input_dim
-        theta = x[start:]
-        return X, X_v, Z, theta
-
-
-    def _debug_get_axis(self, figs):
-        if figs[-1].axes:
-            ax1 = figs[-1].axes[0]
-            ax1.cla()
-        else:
-            ax1 = figs[-1].add_subplot(111)
-        return ax1
-
-    def _debug_plot(self):
-        assert self._debug, "must enable _debug, to debug-plot"
-        import pylab
-#         from mpl_toolkits.mplot3d import Axes3D
-        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
-#         fig.clf()
-
-        # log like
-#         splotshape = (6, 4)
-#         ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
-        ax1 = self._debug_get_axis(figs)
-        ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
-                 ha='center', va='center')
-        kllls = np.array(self._savedklll)
-        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
-        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
-        L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
-
-        param_dict = dict(self._savedparams)
-        gradient_dict = dict(self._savedgradients)
-#         kmm_dict = dict(self._savedpsiKmm)
-        iters = np.array(param_dict.keys())
-        ABCD_dict = np.array(self._savedABCD)
-        self.showing = 0
-
-#         ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
-        figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
-        ax2 = self._debug_get_axis(figs)
-        ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
-        figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
-        ax3 = self._debug_get_axis(figs)
-        ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
-        figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
-        ax4 = self._debug_get_axis(figs)
-        ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
-        figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
-        ax5 = self._debug_get_axis(figs)
-        ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(.15, 0, 1, .86))
-#         figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
-#         fig = figs[-1]
-#         ax6 = fig.add_subplot(121)
-#         ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
-#                  ha='center', va='center')
-#         ax7 = fig.add_subplot(122)
-#         ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
-#                  ha='center', va='center')
-        figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
-        fig = figs[-1]
-        ax8 = fig.add_subplot(121)
-        ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes,
-                 ha='center', va='center')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim')
-        ax8.legend()
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(.15, 0, 1, .86))
-
-        X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
-        Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
-#         Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
-
-        quiver_units = 'xy'
-        quiver_scale = 1
-        quiver_scale_units = 'xy'
-        Xlatentplts = ax2.plot(X, ls="-", marker="x")
-        colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
-        Ulatent = np.zeros_like(X)
-        xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
-        Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-
-        Slatentplts = ax3.plot(S, ls="-", marker="x")
-        Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-        ax3.set_ylim(0, 1.)
-
-        xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
-        UZ = np.zeros_like(Z)
-        Zplts = ax4.plot(Z, ls="-", marker="x")
-        Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-
-        xtheta = np.arange(len(theta))
-        Utheta = np.zeros_like(theta)
-        thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
-        thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale,
-                                  edgecolors=('k',), linewidths=[1])
-        pylab.setp(thetaplts, zorder=0)
-        pylab.setp(thetagrads, zorder=10)
-        ax5.set_xticks(np.arange(len(theta)))
-        ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
-
-#         imkmm = ax6.imshow(kmm_dict[self.showing][0])
-#         from mpl_toolkits.axes_grid1 import make_axes_locatable
-#         divider = make_axes_locatable(ax6)
-#         caxkmm = divider.append_axes("right", "5%", pad="1%")
-#         cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
-#
-#         imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
-#         divider = make_axes_locatable(ax7)
-#         caxkmmdl = divider.append_axes("right", "5%", pad="1%")
-#         cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
-
-#         input_dimleg = ax1.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-#                    loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.15, 1, 1.15),
-#                    borderaxespad=0, mode="expand")
-        ax2.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax3.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax4.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax5.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        Lleg = ax1.legend()
-        Lleg.draggable()
-#         ax1.add_artist(input_dimleg)
-
-        indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
-        indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
-        indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
-#         for err in self._savederrors:
-#             if err < kllls.shape[0]:
-#                 ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
-#                 ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
-#                 ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
-
-#         try:
-#             for f in figs:
-#                 f.canvas.draw()
-#                 f.tight_layout(box=(0, .15, 1, .9))
-# #             pylab.draw()
-# #             pylab.tight_layout(box=(0, .1, 1, .9))
-#         except:
-#             pass
-
-        # parameter changes
-        # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
-        button_options = [0, 0] # [0]: clicked -- [1]: dragged
-
-        def update_plots(event):
-            if button_options[0] and not button_options[1]:
-#               event.button, event.x, event.y, event.xdata, event.ydata)
-                tmp = np.abs(iters - event.xdata)
-                closest_hit = iters[tmp == tmp.min()][0]
-
-                if closest_hit != self.showing:
-                    self.showing = closest_hit
-                    # print closest_hit, iters, event.xdata
-
-                    indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
-                    indicatorKL.set_data(self.showing, kllls[self.showing, 2])
-                    indicatorL.set_data(self.showing, kllls[self.showing, 1])
-
-                    X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
-                    Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
-#                     Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
-
-                    for i, Xlatent in enumerate(Xlatentplts):
-                        Xlatent.set_ydata(X[:, i])
-                    Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
-                    Xlatentgrads.set_UVC(Ulatent, Xg)
-
-                    for i, Slatent in enumerate(Slatentplts):
-                        Slatent.set_ydata(S[:, i])
-                    Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
-                    Slatentgrads.set_UVC(Ulatent, Sg)
-
-                    for i, Zlatent in enumerate(Zplts):
-                        Zlatent.set_ydata(Z[:, i])
-                    Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
-                    Zgrads.set_UVC(UZ, Zg)
-
-                    for p, t in zip(thetaplts, theta):
-                        p.set_height(t)
-                    thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
-                    thetagrads.set_UVC(Utheta, thetag)
-
-#                     imkmm.set_data(kmm_dict[self.showing][0])
-#                     imkmm.autoscale()
-#                     cbarkmm.update_normal(imkmm)
-#
-#                     imkmmdl.set_data(kmm_dict[self.showing][1])
-#                     imkmmdl.autoscale()
-#                     cbarkmmdl.update_normal(imkmmdl)
-
-                    ax2.relim()
-                    # ax3.relim()
-                    ax4.relim()
-                    ax5.relim()
-                    ax2.autoscale()
-                    # ax3.autoscale()
-                    ax4.autoscale()
-                    ax5.autoscale()
-
-                    [fig.canvas.draw() for fig in figs]
-            button_options[0] = 0
-            button_options[1] = 0
-
-        def onclick(event):
-            if event.inaxes is ax1 and event.button == 1:
-                button_options[0] = 1
-        def motion(event):
-            if button_options[0]:
-                button_options[1] = 1
-
-        cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
-        cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
-        cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
-
-        return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7
-
-
-
-
 def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
     objective function for fitting the latent variables for test points
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index e602a59a..34b7fb7b 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -1,4 +1,4 @@
-### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
@@ -26,11 +26,11 @@ class GPLVM(GP):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, input_dim, init='PCA', X = None, kernel=None, normalize_Y=False):
+    def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False):
         if X is None:
             X = self.initialise_latent(init, input_dim, Y)
         if kernel is None:
-            kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
+            kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
         likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, normalize_X=False)
         self._set_params(self._get_params())
@@ -42,26 +42,26 @@ class GPLVM(GP):
             return np.random.randn(Y.shape[0], input_dim)
 
     def _get_param_names(self):
-        return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.num_data)],[]) + GP._get_param_names(self)
+        return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
 
     def _get_params(self):
         return np.hstack((self.X.flatten(), GP._get_params(self)))
 
-    def _set_params(self,x):
-        self.X = x[:self.num_data*self.input_dim].reshape(self.num_data,self.input_dim).copy()
+    def _set_params(self, x):
+        self.X = x[:self.num_data * self.input_dim].reshape(self.num_data, self.input_dim).copy()
         GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):
-        dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
+        dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
 
-        return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
+        return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
 
     def plot(self):
-        assert self.likelihood.Y.shape[1]==2
-        pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
-        Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
+        assert self.likelihood.Y.shape[1] == 2
+        pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)
+        Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
         mu, var, upper, lower = self.predict(Xnew)
-        pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
+        pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
 
     def plot_latent(self, *args, **kwargs):
         return util.plot_latent.plot_latent(self, *args, **kwargs)
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 75c6fee9..febaa750 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -61,12 +61,14 @@ class MRD(Model):
         assert not ('kernel' in kw), "pass kernels through `kernels` argument"
 
         self.input_dim = input_dim
-        self.num_inducing = num_inducing
         self._debug = _debug
+        self.num_inducing = num_inducing
 
         self._init = True
         X = self._init_X(initx, likelihood_or_Y_list)
         Z = self._init_Z(initz, X)
+        self.num_inducing = Z.shape[0] # ensure M==N if M>N
+
         self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
         del self._init
 
@@ -75,12 +77,35 @@ class MRD(Model):
         self.nparams = nparams.cumsum()
 
         self.num_data = self.gref.num_data
+
         self.NQ = self.num_data * self.input_dim
         self.MQ = self.num_inducing * self.input_dim
 
         Model.__init__(self)
         self._set_params(self._get_params())
 
+    def __getstate__(self):
+        return [self.names,
+                self.bgplvms,
+                self.gref,
+                self.nparams,
+                self.input_dim,
+                self.num_inducing,
+                self.num_data,
+                self.NQ,
+                self.MQ]
+
+    def __setstate__(self, state):
+        self.MQ = state.pop()
+        self.NQ = state.pop()
+        self.num_data = state.pop()
+        self.num_inducing = state.pop()
+        self.input_dim = state.pop()
+        self.nparams = state.pop()
+        self.gref = state.pop()
+        self.bgplvms = state.pop()
+        self.names = state.pop()
+
     @property
     def X(self):
         return self.gref.X
@@ -257,7 +282,7 @@ class MRD(Model):
 
     def _handle_plotting(self, fignum, axes, plotf):
         if axes is None:
-            fig = pylab.figure(num=fignum, figsize=(4 * len(self.bgplvms), 3))
+            fig = pylab.figure(num=fignum)
         for i, g in enumerate(self.bgplvms):
             if axes is None:
                 ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
@@ -285,11 +310,11 @@ class MRD(Model):
         return fig
 
     def plot_scales(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, title=r'$Y_{}$'.format(i), *args, **kwargs))
         return fig
 
     def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
+        fig = self.gref.plot_X_1d(*args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):

From 05e8e75c5871df8502730d6ada69eaae0babdb8e Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Tue, 25 Jun 2013 18:01:18 +0100
Subject: [PATCH 02/20] pickling unified with __getstate__ and __setstate__

---
 GPy/core/gp.py               | 18 +++++++++---------
 GPy/core/gp_base.py          |  3 +--
 GPy/kern/kern.py             | 22 ++++++++++++++++++++++
 GPy/models/bayesian_gplvm.py |  2 +-
 GPy/models/mrd.py            |  3 ++-
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index e3bbc85d..6587fdd5 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -6,7 +6,7 @@ import numpy as np
 import pylab as pb
 from .. import kern
 from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
-#from ..util.plot import gpplot,  Tango
+# from ..util.plot import gpplot,  Tango
 from ..likelihoods import EP
 from gp_base import GPBase
 
@@ -46,12 +46,12 @@ class GP(GPBase):
 
         # the gradient of the likelihood wrt the covariance matrix
         if self.likelihood.YYT is None:
-            #alpha = np.dot(self.Ki, self.likelihood.Y)
-            alpha,_ = dpotrs(self.L, self.likelihood.Y,lower=1)
+            # alpha = np.dot(self.Ki, self.likelihood.Y)
+            alpha, _ = dpotrs(self.L, self.likelihood.Y, lower=1)
 
             self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
         else:
-            #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            # tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
             tmp, _ = dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
             tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
             self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
@@ -72,7 +72,7 @@ class GP(GPBase):
         """
         self.likelihood.restart()
         self.likelihood.fit_full(self.kern.K(self.X))
-        self._set_params(self._get_params())  # update the GP
+        self._set_params(self._get_params()) # update the GP
 
     def _model_fit_term(self):
         """
@@ -81,7 +81,7 @@ class GP(GPBase):
         if self.likelihood.YYT is None:
             tmp, _ = dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
             return -0.5 * np.sum(np.square(tmp))
-            #return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
+            # return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
         else:
             return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
@@ -104,13 +104,13 @@ class GP(GPBase):
         """
         return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
-    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
+    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
         """
         Internal helper function for making predictions, does not account
         for normalization or likelihood
         """
-        Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
-        #KiKx = np.dot(self.Ki, Kx)
+        Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
+        # KiKx = np.dot(self.Ki, Kx)
         KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
         mu = np.dot(KiKx.T, self.likelihood.Y)
         if full_cov:
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index a1d182d6..d4f63295 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -29,7 +29,7 @@ class GPBase(Model):
             self._Xscale = np.ones((1, self.input_dim))
 
         super(GPBase, self).__init__()
-        #Model.__init__(self)
+        # Model.__init__(self)
         # All leaf nodes should call self._set_params(self._get_params()) at
         # the end
 
@@ -57,7 +57,6 @@ class GPBase(Model):
         self.num_data = state.pop()
         self.X = state.pop()
         Model.__setstate__(self, state)
-        self._set_params(self._get_params())
 
     def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
         """
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 2c9cc154..aa916940 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -43,6 +43,28 @@ class kern(Parameterised):
 
         Parameterised.__init__(self)
 
+    def __getstate__(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return Parameterised.__getstate__(self) + [self.parts,
+                self.Nparts,
+                self.num_params,
+                self.input_dim,
+                self.input_slices,
+                self.param_slices
+                ]
+
+    def __setstate__(self, state):
+        self.param_slices = state.pop()
+        self.input_slices = state.pop()
+        self.input_dim = state.pop()
+        self.num_params = state.pop()
+        self.Nparts = state.pop()
+        self.parts = state.pop()
+        Parameterised.__setstate__(self, state)
+
 
     def plot_ARD(self, fignum=None, ax=None, title=None):
         """If an ARD kernel is present, it bar-plots the ARD parameters"""
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 7fddfbfd..8ea96405 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -53,7 +53,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
         Get the current state of the class,
         here just all the indices, rest can get recomputed
         """
-        return [self.init] + SparseGP.__getstate__(self)
+        return SparseGP.__getstate__(self) + [self.init]
 
     def __setstate__(self, state):
         self.init = state.pop()
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 774face8..32bd2930 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -85,7 +85,7 @@ class MRD(Model):
         self.ensure_default_constraints()
 
     def __getstate__(self):
-        return [self.names,
+        return Model.__getstate__(self) + [self.names,
                 self.bgplvms,
                 self.gref,
                 self.nparams,
@@ -105,6 +105,7 @@ class MRD(Model):
         self.gref = state.pop()
         self.bgplvms = state.pop()
         self.names = state.pop()
+        Model.__setstate__(self, state)
 
     @property
     def X(self):

From f5effb8cb60c8bc7a39b87b321737cf36ff845e3 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 26 Jun 2013 16:48:48 +0100
Subject: [PATCH 03/20] added robust pickling, switches to old behaviour, if
 get/setstate not implemented

---
 GPy/core/__init__.py                          |   2 +-
 GPy/core/gp.py                                |   7 +-
 GPy/core/gp_base.py                           |   8 +-
 GPy/core/model.py                             |  23 +--
 .../{parameterised.py => parameterized.py}    |  77 ++++-----
 GPy/core/sparse_gp.py                         |   8 +-
 GPy/core/svigp.py                             |   8 +
 GPy/kern/kern.py                              |  14 +-
 GPy/models/bayesian_gplvm.py                  |   8 +-
 GPy/models/gp_regression.py                   |  13 +-
 GPy/models/mrd.py                             |   8 +-
 GPy/models/sparse_gp_classification.py        |  13 +-
 GPy/models/sparse_gp_regression.py            |  10 ++
 GPy/models/sparse_gplvm.py                    |   8 +
 GPy/models/svigp_regression.py                |   8 +
 GPy/models/warped_gp.py                       |   8 +
 doc/GPy.core.rst                              |  60 ++++++-
 doc/GPy.examples.rst                          |  16 +-
 doc/GPy.inference.rst                         |  30 +++-
 doc/GPy.kern.rst                              | 161 +-----------------
 doc/GPy.likelihoods.rst                       |  12 +-
 doc/GPy.models.rst                            |  54 +++---
 doc/GPy.testing.rst                           |  48 ++++++
 doc/GPy.util.rst                              |  56 ++++++
 doc/conf.py                                   |   8 +-
 doc/index.rst                                 |   1 +
 doc/tuto_interacting_with_models.rst          |   6 +-
 27 files changed, 392 insertions(+), 283 deletions(-)
 rename GPy/core/{parameterised.py => parameterized.py} (89%)

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index e9e049b0..8b040984 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from model import *
-from parameterised import *
+from parameterized import *
 import priors
 from gp import GP
 from sparse_gp import SparseGP
diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 6587fdd5..a0e60bcc 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -31,8 +31,11 @@ class GP(GPBase):
         GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self._set_params(self._get_params())
 
-    def __setstate__(self, state):
-        GPBase.__setstate__(self, state)
+    def getstate(self):
+        return GPBase.getstate(self)
+
+    def setstate(self, state):
+        GPBase.setstate(self, state)
         self._set_params(self._get_params())
 
     def _set_params(self, p):
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index d4f63295..2a0ba7c1 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -33,12 +33,12 @@ class GPBase(Model):
         # All leaf nodes should call self._set_params(self._get_params()) at
         # the end
 
-    def __getstate__(self):
+    def getstate(self):
         """
         Get the current state of the class,
         here just all the indices, rest can get recomputed
         """
-        return Model.__getstate__(self) + [self.X,
+        return Model.getstate(self) + [self.X,
                 self.num_data,
                 self.input_dim,
                 self.kern,
@@ -47,7 +47,7 @@ class GPBase(Model):
                 self._Xoffset,
                 self._Xscale]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self._Xscale = state.pop()
         self._Xoffset = state.pop()
         self.output_dim = state.pop()
@@ -56,7 +56,7 @@ class GPBase(Model):
         self.input_dim = state.pop()
         self.num_data = state.pop()
         self.X = state.pop()
-        Model.__setstate__(self, state)
+        Model.setstate(self, state)
 
     def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
         """
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 3c5c8419..fe5d5181 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -6,18 +6,18 @@ from .. import likelihoods
 from ..inference import optimization
 from ..util.linalg import jitchol
 from GPy.util.misc import opt_wrapper
-from parameterised import Parameterised
+from parameterized import Parameterized
 import multiprocessing as mp
 import numpy as np
 from GPy.core.domains import POSITIVE, REAL
 from numpy.linalg.linalg import LinAlgError
 # import numdifftools as ndt
 
-class Model(Parameterised):
+class Model(Parameterized):
     _fail_count = 0 # Count of failed optimization steps (see objective)
     _allowed_failures = 10 # number of allowed failures
     def __init__(self):
-        Parameterised.__init__(self)
+        Parameterized.__init__(self)
         self.priors = None
         self.optimization_runs = []
         self.sampling_runs = []
@@ -32,24 +32,27 @@ class Model(Parameterised):
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the Model class"
 
-    def __getstate__(self):
+    def getstate(self):
         """
-        Get the current state of the class,
-        here just all the indices, rest can get recomputed
+        Get the current state of the class.
+        
+        Inherited from Parameterized, so add those parameters to the state
         """
-        return Parameterised.__getstate__(self) + \
+        return Parameterized.getstate(self) + \
             [self.priors, self.optimization_runs,
              self.sampling_runs, self.preferred_optimizer]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         """
         set state from previous call to getstate
+        
+        call Parameterized with the rest of the state
         """
         self.preferred_optimizer = state.pop()
         self.sampling_runs = state.pop()
         self.optimization_runs = state.pop()
         self.priors = state.pop()
-        Parameterised.__setstate__(self, state)
+        Parameterized.setstate(self, state)
 
     def set_prior(self, regexp, what):
         """
@@ -355,7 +358,7 @@ class Model(Parameterised):
         return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
-        s = Parameterised.__str__(self).split('\n')
+        s = Parameterized.__str__(self).split('\n')
         # add priors to the string
         if self.priors is not None:
             strs = [str(p) if p is not None else '' for p in self.priors]
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterized.py
similarity index 89%
rename from GPy/core/parameterised.py
rename to GPy/core/parameterized.py
index 13467fdf..0f5ef905 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterized.py
@@ -9,7 +9,7 @@ import cPickle
 import warnings
 import transformations
 
-class Parameterised(object):
+class Parameterized(object):
     def __init__(self):
         """
         This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
@@ -20,19 +20,40 @@ class Parameterised(object):
         self.constrained_indices = []
         self.constraints = []
 
-    def pickle(self, filename, protocol= -1):
-        f = file(filename, 'w')
-        cPickle.dump(self, f, protocol)
-        f.close()
+    def pickle(self, filename, protocol=None):
+        if protocol is None:
+            if self._has_get_set_state():
+                protocol = 0
+            else:
+                protocol = -1
+        with open(filename, 'w') as f:
+            cPickle.dump(self, f, protocol)
 
     def copy(self):
         """Returns a (deep) copy of the current model """
         return copy.deepcopy(self)
 
     def __getstate__(self):
+        if self._has_get_set_state():
+            return self.getstate()
+        return self.__dict__
+
+    def __setstate__(self, state):
+        if self._has_get_set_state():
+            return self.setstate(state)
+        self.__dict__ = state
+
+    def _has_get_set_state(self):
+        return 'getstate' in vars(self.__class__) and 'setstate' in vars(self.__class__)
+
+    def getstate(self):
         """
         Get the current state of the class,
         here just all the indices, rest can get recomputed
+        
+        For inheriting from Parameterized:
+        Allways append the state of the inherited object 
+        and call down to the inherited object in setstate!! 
         """
         return [self.tied_indices,
                 self.fixed_indices,
@@ -40,54 +61,13 @@ class Parameterised(object):
                 self.constrained_indices,
                 self.constraints]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self.constraints = state.pop()
         self.constrained_indices = state.pop()
         self.fixed_values = state.pop()
         self.fixed_indices = state.pop()
         self.tied_indices = state.pop()
 
-    @property
-    def params(self):
-        """
-        Returns a **copy** of parameters in non transformed space
-
-        :see_also: :py:func:`GPy.core.Parameterised.params_transformed`
-        """
-        return self._get_params()
-
-    @params.setter
-    def params(self, params):
-        self._set_params(params)
-
-    @property
-    def params_transformed(self):
-        """
-        Returns a **copy** of parameters in transformed space
-
-        :see_also: :py:func:`GPy.core.Parameterised.params`
-        """
-        return self._get_params_transformed()
-
-    @params_transformed.setter
-    def params_transformed(self, params):
-        self._set_params_transformed(params)
-
-    _get_set_deprecation = """get and set methods wont be available at next minor release
-        in the next releases you will get and set with following syntax:
-        Assume m is a model class:
-        print m['var']          # > prints all parameters matching 'var'
-        m['var'] = 2.           # > sets all parameters matching 'var' to 2.
-        m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
-        """
-    def get(self, regexp):
-        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
-        return self[regexp]
-
-    def set(self, regexp, val):
-        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
-        self[regexp] = val
-
     def __getitem__(self, regexp, return_names=False):
         """
         Get a model parameter by name.  The name is applied as a regular
@@ -120,6 +100,9 @@ class Parameterised(object):
             raise AttributeError, "no parameter matches %s" % name
 
     def tie_params(self, regexp):
+        """
+        Tie (all!) parameters matching the regular expression `regexp`. 
+        """
         matches = self.grep_param_names(regexp)
         assert matches.size > 0, "need at least something to tie together"
         if len(self.tied_indices):
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index f4416cf2..93ba5d7d 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -50,22 +50,22 @@ class SparseGP(GPBase):
         if self.has_uncertain_inputs:
             self.X_variance /= np.square(self._Xscale)
 
-    def __getstate__(self):
+    def getstate(self):
         """
         Get the current state of the class,
         here just all the indices, rest can get recomputed
         """
-        return GPBase.__getstate__(self) + [self.Z,
+        return GPBase.getstate(self) + [self.Z,
                 self.num_inducing,
                 self.has_uncertain_inputs,
                 self.X_variance]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self.X_variance = state.pop()
         self.has_uncertain_inputs = state.pop()
         self.num_inducing = state.pop()
         self.Z = state.pop()
-        GPBase.__setstate__(self, state)
+        GPBase.setstate(self, state)
 
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py
index 1db0e26f..5d6bcd8b 100644
--- a/GPy/core/svigp.py
+++ b/GPy/core/svigp.py
@@ -91,6 +91,14 @@ class SVIGP(GPBase):
         self._param_steplength_trace = []
         self._vb_steplength_trace = []
 
+    def getstate(self):
+        return GPBase.getstate(self)
+
+
+    def setstate(self, state):
+        return GPBase.setstate(self, state)
+
+
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
         self.Kmm = self.kern.K(self.Z)
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index aa916940..176abbaf 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -3,12 +3,12 @@
 
 import numpy as np
 import pylab as pb
-from ..core.parameterised import Parameterised
+from ..core.parameterized import Parameterized
 from parts.kernpart import Kernpart
 import itertools
 from parts.prod import Prod as prod
 
-class kern(Parameterised):
+class kern(Parameterized):
     def __init__(self, input_dim, parts=[], input_slices=None):
         """
         This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
@@ -41,14 +41,14 @@ class kern(Parameterised):
 
         self.compute_param_slices()
 
-        Parameterised.__init__(self)
+        Parameterized.__init__(self)
 
-    def __getstate__(self):
+    def getstate(self):
         """
         Get the current state of the class,
         here just all the indices, rest can get recomputed
         """
-        return Parameterised.__getstate__(self) + [self.parts,
+        return Parameterized.getstate(self) + [self.parts,
                 self.Nparts,
                 self.num_params,
                 self.input_dim,
@@ -56,14 +56,14 @@ class kern(Parameterised):
                 self.param_slices
                 ]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self.param_slices = state.pop()
         self.input_slices = state.pop()
         self.input_dim = state.pop()
         self.num_params = state.pop()
         self.Nparts = state.pop()
         self.parts = state.pop()
-        Parameterised.__setstate__(self, state)
+        Parameterized.setstate(self, state)
 
 
     def plot_ARD(self, fignum=None, ax=None, title=None):
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 8ea96405..916c307d 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -48,16 +48,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
         self.ensure_default_constraints()
 
-    def __getstate__(self):
+    def getstate(self):
         """
         Get the current state of the class,
         here just all the indices, rest can get recomputed
         """
-        return SparseGP.__getstate__(self) + [self.init]
+        return SparseGP.getstate(self) + [self.init]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self.init = state.pop()
-        SparseGP.__setstate__(self, state)
+        SparseGP.setstate(self, state)
 
     def _get_param_names(self):
         X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index db5d21b2..615e6618 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -25,11 +25,20 @@ class GPRegression(GP):
 
     """
 
-    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
+    def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
         if kernel is None:
             kernel = kern.rbf(X.shape[1])
 
-        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
+        likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
 
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self.ensure_default_constraints()
+
+    def getstate(self):
+        return GP.getstate(self)
+
+
+    def setstate(self, state):
+        return GP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 32bd2930..c8cb6607 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -84,8 +84,8 @@ class MRD(Model):
         Model.__init__(self)
         self.ensure_default_constraints()
 
-    def __getstate__(self):
-        return Model.__getstate__(self) + [self.names,
+    def getstate(self):
+        return Model.getstate(self) + [self.names,
                 self.bgplvms,
                 self.gref,
                 self.nparams,
@@ -95,7 +95,7 @@ class MRD(Model):
                 self.NQ,
                 self.MQ]
 
-    def __setstate__(self, state):
+    def setstate(self, state):
         self.MQ = state.pop()
         self.NQ = state.pop()
         self.num_data = state.pop()
@@ -105,7 +105,7 @@ class MRD(Model):
         self.gref = state.pop()
         self.bgplvms = state.pop()
         self.names = state.pop()
-        Model.__setstate__(self, state)
+        Model.setstate(self, state)
 
     @property
     def X(self):
diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py
index 9228fb89..5f36ebe1 100644
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@@ -28,7 +28,7 @@ class SparseGPClassification(SparseGP):
 
     def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
         if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
+            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
 
         if likelihood is None:
             distribution = likelihoods.likelihood_functions.Binomial()
@@ -41,7 +41,16 @@ class SparseGPClassification(SparseGP):
             i = np.random.permutation(X.shape[0])[:num_inducing]
             Z = X[i].copy()
         else:
-            assert Z.shape[1]==X.shape[1]
+            assert Z.shape[1] == X.shape[1]
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
         self.ensure_default_constraints()
+
+    def getstate(self):
+        return SparseGP.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 0dcef3e0..d5fcc7d7 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -43,3 +43,13 @@ class SparseGPRegression(SparseGP):
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
         self.ensure_default_constraints()
+        pass
+
+    def getstate(self):
+        return SparseGP.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py
index d6f4adb9..6e7e40b1 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/models/sparse_gplvm.py
@@ -28,6 +28,14 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
         SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
         self.ensure_default_constraints()
 
+    def getstate(self):
+        return SparseGPRegression.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGPRegression.setstate(self, state)
+
+
     def _get_param_names(self):
         return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
                 + SparseGPRegression._get_param_names(self))
diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py
index 8448bf37..4d22c619 100644
--- a/GPy/models/svigp_regression.py
+++ b/GPy/models/svigp_regression.py
@@ -42,3 +42,11 @@ class SVIGPRegression(SVIGP):
 
         SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
         self.load_batch()
+
+    def getstate(self):
+        return GPBase.getstate(self)
+
+
+    def setstate(self, state):
+        return GPBase.setstate(self, state)
+
diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py
index fcef66c6..dc6eeb46 100644
--- a/GPy/models/warped_gp.py
+++ b/GPy/models/warped_gp.py
@@ -28,6 +28,14 @@ class WarpedGP(GP):
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self._set_params(self._get_params())
 
+    def getstate(self):
+        return GP.getstate(self)
+
+
+    def setstate(self, state):
+        return GP.setstate(self, state)
+
+
     def _scale_data(self, Y):
         self._Ymax = Y.max()
         self._Ymin = Y.min()
diff --git a/doc/GPy.core.rst b/doc/GPy.core.rst
index e02aaa2a..0590450f 100644
--- a/doc/GPy.core.rst
+++ b/doc/GPy.core.rst
@@ -9,6 +9,38 @@ core Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`domains` Module
+---------------------
+
+.. automodule:: GPy.core.domains
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`fitc` Module
+------------------
+
+.. automodule:: GPy.core.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gp` Module
+----------------
+
+.. automodule:: GPy.core.gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gp_base` Module
+---------------------
+
+.. automodule:: GPy.core.gp_base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`model` Module
 -------------------
 
@@ -17,10 +49,10 @@ core Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`parameterised` Module
+:mod:`parameterized` Module
 ---------------------------
 
-.. automodule:: GPy.core.parameterised
+.. automodule:: GPy.core.parameterized
     :members:
     :undoc-members:
     :show-inheritance:
@@ -33,3 +65,27 @@ core Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`sparse_gp` Module
+-----------------------
+
+.. automodule:: GPy.core.sparse_gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`svigp` Module
+-------------------
+
+.. automodule:: GPy.core.svigp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`transformations` Module
+-----------------------------
+
+.. automodule:: GPy.core.transformations
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index f17cf826..fedfd4b9 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -25,14 +25,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`non_gaussian` Module
---------------------------
-
-.. automodule:: GPy.examples.non_gaussian
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`regression` Module
 ------------------------
 
@@ -41,6 +33,14 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`stochastic` Module
+------------------------
+
+.. automodule:: GPy.examples.stochastic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`tutorials` Module
 -----------------------
 
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
index f30e7d25..6a1bef4a 100644
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@@ -1,10 +1,18 @@
 inference Package
 =================
 
-:mod:`SGD` Module
------------------
+:mod:`conjugate_gradient_descent` Module
+----------------------------------------
 
-.. automodule:: GPy.inference.SGD
+.. automodule:: GPy.inference.conjugate_gradient_descent
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gradient_descent_update_rules` Module
+-------------------------------------------
+
+.. automodule:: GPy.inference.gradient_descent_update_rules
     :members:
     :undoc-members:
     :show-inheritance:
@@ -25,3 +33,19 @@ inference Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`scg` Module
+-----------------
+
+.. automodule:: GPy.inference.scg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sgd` Module
+-----------------
+
+.. automodule:: GPy.inference.sgd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index aef712dc..35d9ec00 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -9,38 +9,6 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Brownian` Module
-----------------------
-
-.. automodule:: GPy.kern.Brownian
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`Matern32` Module
-----------------------
-
-.. automodule:: GPy.kern.Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`Matern52` Module
-----------------------
-
-.. automodule:: GPy.kern.Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`bias` Module
-------------------
-
-.. automodule:: GPy.kern.bias
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`constructors` Module
 --------------------------
 
@@ -49,30 +17,6 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`coregionalise` Module
----------------------------
-
-.. automodule:: GPy.kern.coregionalise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`exponential` Module
--------------------------
-
-.. automodule:: GPy.kern.exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`finite_dimensional` Module
---------------------------------
-
-.. automodule:: GPy.kern.finite_dimensional
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`kern` Module
 ------------------
 
@@ -81,107 +25,10 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`kernpart` Module
-----------------------
+Subpackages
+-----------
 
-.. automodule:: GPy.kern.kernpart
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
-:mod:`linear` Module
---------------------
-
-.. automodule:: GPy.kern.linear
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_Matern32` Module
--------------------------------
-
-.. automodule:: GPy.kern.periodic_Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_Matern52` Module
--------------------------------
-
-.. automodule:: GPy.kern.periodic_Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_exponential` Module
-----------------------------------
-
-.. automodule:: GPy.kern.periodic_exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`prod` Module
-------------------
-
-.. automodule:: GPy.kern.prod
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`prod_orthogonal` Module
------------------------------
-
-.. automodule:: GPy.kern.prod_orthogonal
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`rational_quadratic` Module
---------------------------------
-
-.. automodule:: GPy.kern.rational_quadratic
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`rbf` Module
------------------
-
-.. automodule:: GPy.kern.rbf
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`spline` Module
---------------------
-
-.. automodule:: GPy.kern.spline
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`symmetric` Module
------------------------
-
-.. automodule:: GPy.kern.symmetric
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sympykern` Module
------------------------
-
-.. automodule:: GPy.kern.sympykern
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`white` Module
--------------------
-
-.. automodule:: GPy.kern.white
-    :members:
-    :undoc-members:
-    :show-inheritance:
+    GPy.kern.parts
 
diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst
index 03c15a82..9fec38f8 100644
--- a/doc/GPy.likelihoods.rst
+++ b/doc/GPy.likelihoods.rst
@@ -9,7 +9,7 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`EP` Module
+:mod:`ep` Module
 ----------------
 
 .. automodule:: GPy.likelihoods.ep
@@ -17,7 +17,7 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Gaussian` Module
+:mod:`gaussian` Module
 ----------------------
 
 .. automodule:: GPy.likelihoods.gaussian
@@ -41,3 +41,11 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`link_functions` Module
+----------------------------
+
+.. automodule:: GPy.likelihoods.link_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index f4ae6a59..4d227642 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -9,7 +9,7 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Bayesian_GPLVM` Module
+:mod:`bayesian_gplvm` Module
 ----------------------------
 
 .. automodule:: GPy.models.bayesian_gplvm
@@ -17,18 +17,18 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`gp` Module
-----------------
+:mod:`fitc_classification` Module
+---------------------------------
 
-.. automodule:: GPy.models.gp
+.. automodule:: GPy.models.fitc_classification
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`gplvm` Module
--------------------
+:mod:`gp_classification` Module
+-------------------------------
 
-.. automodule:: GPy.models.gplvm
+.. automodule:: GPy.models.gp_classification
     :members:
     :undoc-members:
     :show-inheritance:
@@ -41,18 +41,26 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`sparse_gp` Module
------------------------
+:mod:`gplvm` Module
+-------------------
 
-.. automodule:: GPy.models.sparse_gp
+.. automodule:: GPy.models.gplvm
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`SparseGPLVM` Module
---------------------------
+:mod:`mrd` Module
+-----------------
 
-.. automodule:: GPy.models.sparse_gplvm
+.. automodule:: GPy.models.mrd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sparse_gp_classification` Module
+--------------------------------------
+
+.. automodule:: GPy.models.sparse_gp_classification
     :members:
     :undoc-members:
     :show-inheritance:
@@ -65,13 +73,21 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-.. :mod:`uncollapsed_sparse_GP` Module
-.. -----------------------------------
+:mod:`sparse_gplvm` Module
+--------------------------
 
-.. .. automodule:: GPy.models.uncollapsed_sparse_GP
-..     :members:
-..     :undoc-members:
-..     :show-inheritance:
+.. automodule:: GPy.models.sparse_gplvm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`svigp_regression` Module
+------------------------------
+
+.. automodule:: GPy.models.svigp_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
 
 :mod:`warped_gp` Module
 -----------------------
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index 5b32558b..6c461177 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -1,6 +1,14 @@
 testing Package
 ===============
 
+:mod:`testing` Package
+----------------------
+
+.. automodule:: GPy.testing
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`bgplvm_tests` Module
 --------------------------
 
@@ -9,6 +17,22 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`cgd_tests` Module
+-----------------------
+
+.. automodule:: GPy.testing.cgd_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`checkgrad` Module
+-----------------------
+
+.. automodule:: GPy.testing.checkgrad
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`examples_tests` Module
 ----------------------------
 
@@ -33,6 +57,14 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`mrd_tests` Module
+-----------------------
+
+.. automodule:: GPy.testing.mrd_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`prior_tests` Module
 -------------------------
 
@@ -41,6 +73,22 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`psi_stat_expactation_tests` Module
+----------------------------------------
+
+.. automodule:: GPy.testing.psi_stat_expactation_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`psi_stat_gradient_tests` Module
+-------------------------------------
+
+.. automodule:: GPy.testing.psi_stat_gradient_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`sparse_gplvm_tests` Module
 --------------------------------
 
diff --git a/doc/GPy.util.rst b/doc/GPy.util.rst
index 5bec990b..e66329b6 100644
--- a/doc/GPy.util.rst
+++ b/doc/GPy.util.rst
@@ -17,6 +17,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`classification` Module
+----------------------------
+
+.. automodule:: GPy.util.classification
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`datasets` Module
 ----------------------
 
@@ -25,6 +33,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`decorators` Module
+------------------------
+
+.. automodule:: GPy.util.decorators
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`linalg` Module
 --------------------
 
@@ -41,6 +57,22 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`mocap` Module
+-------------------
+
+.. automodule:: GPy.util.mocap
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`pca` Module
+-----------------
+
+.. automodule:: GPy.util.pca
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`plot` Module
 ------------------
 
@@ -49,6 +81,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`plot_latent` Module
+-------------------------
+
+.. automodule:: GPy.util.plot_latent
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`squashers` Module
 -----------------------
 
@@ -57,6 +97,22 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`univariate_Gaussian` Module
+---------------------------------
+
+.. automodule:: GPy.util.univariate_Gaussian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`visualize` Module
+-----------------------
+
+.. automodule:: GPy.util.visualize
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`warping_functions` Module
 -------------------------------
 
diff --git a/doc/conf.py b/doc/conf.py
index 8a05f386..4c377cb6 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -103,8 +103,10 @@ class Mock(object):
 #import mock
 
 print "Mocking"
-MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
-#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
+MOCK_MODULES = ['sympy', 
+    'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 
+    'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 
+    ]
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = Mock()
 
@@ -288,7 +290,7 @@ latex_elements = {
     #'pointsize': '10pt',
 
     # Additional stuff for the LaTeX preamble.
-    #'preamble': '',
+    'preamble': '\\usepackage{MnSymbol}',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
diff --git a/doc/index.rst b/doc/index.rst
index a7b68c16..29b4cf43 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -11,6 +11,7 @@ For a quick start, you can have a look at one of the tutorials:
 * `Interacting with models <tuto_interacting_with_models.html>`_
 * `A kernel overview <tuto_kernel_overview.html>`_ 
 * `Writing new kernels <tuto_creating_new_kernels.html>`_
+* `Writing new models <tuto_creating_new_models.html>`_
 
 You may also be interested by some examples in the GPy/examples folder.
 
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 3cea7fb7..4a466bae 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -1,3 +1,5 @@
+.. _interacting_with_models:
+
 *************************************
 Interacting with models
 *************************************
@@ -210,6 +212,6 @@ white_variance and noise_variance are tied together.::
 
 Further Reading
 ===============
-All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models. 
+All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read :ref:`creating_new_models`. 
 
-By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+By deafult, GPy uses the scg optimizer. To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.

From c773672b6914b96617ec5577d6192120d3d4a67a Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 26 Jun 2013 17:02:35 +0100
Subject: [PATCH 04/20] params left in __get/setitem__

---
 GPy/core/parameterized.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py
index 0f5ef905..88625fba 100644
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@@ -93,9 +93,9 @@ class Parameterized(object):
         if len(matches):
             val = np.array(val)
             assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
-            x = self.params
+            x = self._get_params
             x[matches] = val
-            self.params = x
+            self._set_params(x)
         else:
             raise AttributeError, "no parameter matches %s" % name
 

From d978d3e390057ada7faa8e14e984c2e024a61fbb Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 26 Jun 2013 17:02:48 +0100
Subject: [PATCH 05/20] added tutorial for creating models

---
 doc/tuto_creating_new_models.rst | 64 ++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 doc/tuto_creating_new_models.rst

diff --git a/doc/tuto_creating_new_models.rst b/doc/tuto_creating_new_models.rst
new file mode 100644
index 00000000..021b4950
--- /dev/null
+++ b/doc/tuto_creating_new_models.rst
@@ -0,0 +1,64 @@
+.. _creating_new_models:
+
+*******************
+Creating new Models
+*******************
+
+In GPy all models inherit from the base class :py:class:`~GPy.core.parameterized.Parameterized`. :py:class:`~GPy.core.parameterized.Parameterized` is a class which allows for parameterization of objects. All it holds is functionality for tying, bounding and fixing of parameters. It also provides the functionality of searching and manipulating parameters by regular expression syntax. See :py:class:`~GPy.core.parameterized.Parameterized` for more information. 
+
+The :py:class:`~GPy.core.model.Model` class provides parameter introspection, objective function and optimization.
+
+In order to fully use all functionality of :py:class:`~GPy.core.model.Model` some methods need to be implemented / overridden. In order to explain the functionality of those methods we will use a wrapper to the numpy ``rosen`` function, which holds input parameters :math:`\mathbf{X}`. Where :math:`\mathbf{X}\in\mathbb{R}^{N\times 1}`.
+
+Obligatory methods
+==================
+
+:py:meth:`~GPy.core.model.Model.__init__` :
+	Initialize the model with the given parameters. In our example we have to store shape information of :math:`\mathbf X` and the parameters themselves::
+	
+		self.X = X
+		self.num_inputs = self.X.shape[0]
+		assert self.X.ndim == 1, only vector inputs allowed
+
+:py:meth:`~GPy.core.model.Model._get_params` : 
+    Return parameters of the model as a flattened numpy array-like. So, in our example we have to return the input parameters::
+    
+    	return self.X.flatten()
+
+:py:meth:`~GPy.core.model.Model._set_params` : 
+    Set parameters, which have been fetched through :py:meth:`~GPy.core.model.Model._get_params`. In other words, "invert" the functionality of :py:meth:`~GPy.core.model.Model._get_params`::
+
+    	self.X = params[:self.num_inputs*self.input_dim].reshape(self.num_inputs)
+
+:py:meth:`~GPy.core.model.Model.log_likelihood` :
+	Returns the log-likelihood of the new model. For our example this is just the call to ``rosen``::
+
+		return scipy.optimize.rosen(self.X)
+
+:py:meth:`~GPy.core.model.Model._log_likelihood_gradients` :
+	Returns the gradients with respect to all parameters::
+
+		return scipy.optimize.rosen_der(self.X)
+
+
+Optional methods
+================
+
+If you want some special functionality please provide the following methods:
+
+Using the pickle functionality
+------------------------------
+
+To be able to use the pickle functionality ``m.pickle(<path>)`` the methods ``getstate(self)`` and ``setstate(self, state)`` have to be provided. The convention for a ``state`` in ``GPy`` is a list of all parameters, which are needed to restore the model. All classes provided in ``GPy`` follow this convention, thus you can just append to the state of the inherited class and call the inherited class' ``setstate`` with the appropriate state.
+
+:py:meth:`~GPy.core.model.Model.getstate` :
+	This method returns a state of the model, following the memento pattern. As we are inheriting from :py:class:`~GPy.core.model.Model`, we have to return the state of Model as well. In out example we have `X` and `num_inputs` as state::
+
+		return Model.getstate(self) + [self.X, self.num_inputs]
+
+:py:meth:`~GPy.core.model.Model.setstate` :
+	This method restores this model with the given ``state``::
+
+		self.num_inputs = state.pop()
+		self.X = state.pop()
+		return Model.setstate(self, state)
\ No newline at end of file

From 146dff7cbf4434ec814bf5e5e52f4b24c8f2898e Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 26 Jun 2013 17:04:10 +0100
Subject: [PATCH 06/20] _get/set_params into parameterized

---
 GPy/core/model.py         | 4 ----
 GPy/core/parameterized.py | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index fe5d5181..d4c8c974 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -23,10 +23,6 @@ class Model(Parameterized):
         self.sampling_runs = []
         self.preferred_optimizer = 'scg'
         # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
-    def _get_params(self):
-        raise NotImplementedError, "this needs to be implemented to use the Model class"
-    def _set_params(self, x):
-        raise NotImplementedError, "this needs to be implemented to use the Model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the Model class"
     def _log_likelihood_gradients(self):
diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py
index 88625fba..beb7521f 100644
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@@ -20,6 +20,11 @@ class Parameterized(object):
         self.constrained_indices = []
         self.constraints = []
 
+    def _get_params(self):
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
+    def _set_params(self, x):
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
+
     def pickle(self, filename, protocol=None):
         if protocol is None:
             if self._has_get_set_state():

From 761f7b02fcedf71aa7ab2ee9f747478a8fb6f3bd Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Wed, 26 Jun 2013 17:05:28 +0100
Subject: [PATCH 07/20] _get/set_params into parameterized

---
 GPy/core/parameterized.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py
index beb7521f..b94689ad 100644
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@@ -98,7 +98,7 @@ class Parameterized(object):
         if len(matches):
             val = np.array(val)
             assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
-            x = self._get_params
+            x = self._get_params()
             x[matches] = val
             self._set_params(x)
         else:

From 97acb36b5596745cfd762eefde2172c70384e09b Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Wed, 26 Jun 2013 17:29:37 +0100
Subject: [PATCH 08/20] new constrain_negative negative_logexp (selected by
 default)

---
 GPy/core/parameterised.py   |  2 +-
 GPy/core/transformations.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index 13467fdf..af96e0a7 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -199,7 +199,7 @@ class Parameterised(object):
 
     def constrain_negative(self, regexp):
         """ Set negative constraints. """
-        self.constrain(regexp, transformations.negative_exponent())
+        self.constrain(regexp, transformations.negative_logexp())
 
     def constrain_positive(self, regexp):
         """ Set positive constraints. """
diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py
index 2520a33b..5db835a9 100644
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@@ -36,6 +36,20 @@ class logexp(transformation):
     def __str__(self):
         return '(+ve)'
 
+class negative_logexp(transformation):
+    domain = NEGATIVE
+    def f(self, x):
+        return -np.log(1. + np.exp(x))
+    def finv(self, f):
+        return np.log(np.exp(-f) - 1.)
+    def gradfactor(self, f):
+        ef = np.exp(-f)
+        return -(ef - 1.) / ef
+    def initialize(self, f):
+        return -np.abs(f)
+    def __str__(self):
+        return '(-ve)'
+
 class logexp_clipped(transformation):
     max_bound = 1e100
     min_bound = 1e-10

From b9ca93e8abb716065680809e6cedf0d36fdeb93d Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Wed, 26 Jun 2013 18:41:05 +0100
Subject: [PATCH 09/20] Fixed bug in constructors

---
 GPy/kern/constructors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 697f3554..487a324b 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -227,7 +227,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
      :param n_freq: the number of frequencies considered for the periodic subspace
      :type n_freq: int
     """
-    part = parts.periodic_Matern52part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
+    part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
     return kern(input_dim, [part])
 
 def prod(k1,k2,tensor=False):

From 26260c355c7d3c247cd6d23f9d98f189e406ecb6 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 27 Jun 2013 10:40:23 +0100
Subject: [PATCH 10/20] added docstring to domains

---
 GPy/core/domains.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/GPy/core/domains.py b/GPy/core/domains.py
index dfc880f6..cefac6c2 100644
--- a/GPy/core/domains.py
+++ b/GPy/core/domains.py
@@ -2,6 +2,22 @@
 Created on 4 Jun 2013
 
 @author: maxz
+
+(Hyper-)Parameter domains defined for :py:mod:`~GPy.core.priors` and :py:mod:`~GPy.kern`.
+These domains specify the legitimate realm of the parameters to live in.
+
+:const:`~GPy.core.domains.REAL` :
+    real domain, all values in the real numbers are allowed
+
+:const:`~GPy.core.domains.POSITIVE`:
+    positive domain, only positive real values are allowed
+    
+:const:`~GPy.core.domains.NEGATIVE`:
+    same as :const:`~GPy.core.domains.POSITIVE`, but only negative values are allowed
+    
+:const:`~GPy.core.domains.BOUNDED`:
+    only values within the bounded range are allowed,
+    the bounds are specified withing the object with the bounded range
 '''
 
 REAL = 'real'

From 8cdedf2edb697a484c2b11b381f463ca95541da1 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 27 Jun 2013 11:11:41 +0100
Subject: [PATCH 11/20] plot_latent bug-fix in mrd

---
 GPy/kern/kern.py  | 3 ++-
 GPy/models/mrd.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 176abbaf..4bb4752f 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -75,7 +75,8 @@ class kern(Parameterized):
             if hasattr(p, 'ARD') and p.ARD:
                 if title is None:
                     ax.set_title('ARD parameters, %s kernel' % p.name)
-
+                else:
+                    ax.set_title(title)
                 if p.name == 'linear':
                     ard_params = p.variances
                 else:
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index c8cb6607..0af0dede 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -315,7 +315,7 @@ class MRD(Model):
         return fig
 
     def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self.gref.plot_X_1d(*args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
+        fig = self.gref.plot_latent(*args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):

From 9e9131b90d658fe2af4704ff89845c91402e83fe Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Thu, 27 Jun 2013 11:17:58 +0100
Subject: [PATCH 12/20] plot_latent bug-fix of creating no figure

---
 GPy/testing/examples_tests.py | 10 +++++-----
 GPy/util/plot_latent.py       |  5 +++--
 GPy/util/visualize.py         | 24 ++++++++++++------------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index ec030055..989251a7 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -19,14 +19,14 @@ class ExamplesTests(unittest.TestCase):
         self.assertTrue(isinstance(Model, GPy.models))
 
 """
-def model_instance_generator(Model):
+def model_instance_generator(model):
     def check_model_returned(self):
-        self._model_instance(Model)
+        self._model_instance(model)
     return check_model_returned
 
-def checkgrads_generator(Model):
+def checkgrads_generator(model):
     def model_checkgrads(self):
-        self._checkgrad(Model)
+        self._checkgrad(model)
     return model_checkgrads
 """
 
@@ -37,7 +37,7 @@ def model_checkgrads(model):
 
 def model_instance(model):
     #assert isinstance(model, GPy.core.model)
-    return isinstance(model, GPy.core.Model)
+    return isinstance(model, GPy.core.model)
 
 @nottest
 def test_models():
diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py
index c36c5e34..9c832769 100644
--- a/GPy/util/plot_latent.py
+++ b/GPy/util/plot_latent.py
@@ -2,13 +2,14 @@ import pylab as pb
 import numpy as np
 from .. import util
 
-def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
+def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None):
     """
     :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
     :param resolution: the resolution of the grid on which to evaluate the predictive variance
     """
     if ax is None:
-        ax = pb.gca()
+        fig = pb.figure(num=fignum)
+        ax = fig.add_subplot(111)
     util.plot.Tango.reset()
 
     if labels is None:
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 529f0eff..886e8486 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -103,7 +103,7 @@ class lvm(matplotlib_show):
             self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
 
         self.data_visualize = data_visualize
-        self.Model = model
+        self.model = model
         self.latent_axes = latent_axes
         self.sense_axes = sense_axes
         self.called = False
@@ -120,7 +120,7 @@ class lvm(matplotlib_show):
     def modify(self, vals):
         """When latent values are modified update the latent representation and ulso update the output visualization."""
         self.vals = vals.copy()
-        y = self.Model.predict(self.vals)[0]
+        y = self.model.predict(self.vals)[0]
         self.data_visualize.modify(y)
         self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]])
         self.axes.figure.canvas.draw()
@@ -148,15 +148,15 @@ class lvm(matplotlib_show):
         # A click in the bar chart axis for selection a dimension.
         if self.sense_axes != None:
             self.sense_axes.cla()
-            self.sense_axes.bar(np.arange(self.Model.input_dim),1./self.Model.input_sensitivity(),color='b')
+            self.sense_axes.bar(np.arange(self.model.input_dim),1./self.model.input_sensitivity(),color='b')
 
             if self.latent_index[1] == self.latent_index[0]:
-                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='y')
-                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='y')
+                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
+                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y')
 
             else:
-                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='g')
-                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='r')
+                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g')
+                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r')
 
             self.sense_axes.figure.canvas.draw()
 
@@ -193,7 +193,7 @@ class lvm_dimselect(lvm):
     GPy.examples.dimensionality_reduction.BGPVLM_oil()
 
     """
-    def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1], labels=None):
+    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1], labels=None):
         if latent_axes==None and sense_axes==None:
             self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
         elif sense_axes==None:
@@ -202,7 +202,7 @@ class lvm_dimselect(lvm):
         else:
             self.sense_axes = sense_axes
         self.labels = labels
-        lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
+        lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
         self.show_sensitivities()
         print "use left and right mouse butons to select dimensions"
 
@@ -210,7 +210,7 @@ class lvm_dimselect(lvm):
     def on_click(self, event):
 
         if event.inaxes==self.sense_axes:
-            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.Model.input_dim-1))
+            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
             if event.button == 1:
                 # Make it red if and y-axis (red=port=left) if it is a left button click
                 self.latent_index[1] = new_index                
@@ -221,7 +221,7 @@ class lvm_dimselect(lvm):
             self.show_sensitivities()
 
             self.latent_axes.cla()
-            self.Model.plot_latent(which_indices=self.latent_index,
+            self.model.plot_latent(which_indices=self.latent_index,
                                    ax=self.latent_axes, labels=self.labels)
             self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
             self.modify(self.latent_values)
@@ -235,7 +235,7 @@ class lvm_dimselect(lvm):
 
     def on_leave(self,event):
         latent_values = self.latent_values.copy()
-        y = self.Model.predict(latent_values[None,:])[0]
+        y = self.model.predict(latent_values[None,:])[0]
         self.data_visualize.modify(y)
 
 

From 66a6bde715f192ef6c4a1470d6d90c6425d01da9 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 10:59:29 +0100
Subject: [PATCH 13/20] more robust gradient clippinggit stat

---
 GPy/core/model.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index d4c8c974..5de114c5 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -272,12 +272,13 @@ class Model(Parameterized):
         """
         try:
             self._set_params_transformed(x)
+            obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
             self._fail_count = 0
         except (LinAlgError, ZeroDivisionError, ValueError) as e:
             if self._fail_count >= self._allowed_failures:
                 raise e
             self._fail_count += 1
-        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+            obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
         return obj_grads
 
     def objective_and_gradients(self, x):
@@ -285,12 +286,13 @@ class Model(Parameterized):
             self._set_params_transformed(x)
             obj_f = -self.log_likelihood() - self.log_prior()
             self._fail_count = 0
+            obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
         except (LinAlgError, ZeroDivisionError, ValueError) as e:
             if self._fail_count >= self._allowed_failures:
                 raise e
             self._fail_count += 1
             obj_f = np.inf
-        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+            obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -311,7 +313,9 @@ class Model(Parameterized):
 
         optimizer = optimization.get_optimizer(optimizer)
         opt = optimizer(start, model=self, **kwargs)
+
         opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
+
         self.optimization_runs.append(opt)
 
         self._set_params_transformed(opt.x_opt)

From 08a902ed6ce28b7e751b65ed1fe9cdee79d6c6ee Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:00:09 +0100
Subject: [PATCH 14/20] massively improved printing

---
 GPy/inference/scg.py | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/GPy/inference/scg.py b/GPy/inference/scg.py
index 5753be7f..680874e4 100644
--- a/GPy/inference/scg.py
+++ b/GPy/inference/scg.py
@@ -26,11 +26,14 @@ import numpy as np
 import sys
 
 
-def print_out(len_maxiters, display, fnow, current_grad, beta, iteration):
-    if display:
-        print '\r',
-        print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
-        sys.stdout.flush()
+def print_out(len_maxiters, fnow, current_grad, beta, iteration):
+    print '\r',
+    print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+    sys.stdout.flush()
+
+def exponents(fnow, current_grad):
+    exps = [np.abs(fnow), current_grad]
+    return np.sign(exps) * np.log10(exps).astype(int)
 
 def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
     """
@@ -52,6 +55,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         ftol = 1e-6
     if gtol is None:
         gtol = 1e-5
+
     sigma0 = 1.0e-8
     fold = f(x, *optargs) # Initial function value.
     function_eval = 1
@@ -74,6 +78,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
     len_maxiters = len(str(maxiters))
     if display:
         print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
+        exps = exponents(fnow, current_grad)
+        p_iter = iteration
 
     # Main optimization loop.
     while iteration < maxiters:
@@ -104,7 +110,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         function_eval += 1
 
         if function_eval >= max_f_eval:
-            status = "Maximum number of function evaluations exceeded"
+            status = "maximum number of function evaluations exceeded"
             break
 #             return x, flog, function_eval, status
 
@@ -122,15 +128,24 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         flog.append(fnow) # Current function value
 
         iteration += 1
-        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
+        if display:
+            n_exps = exponents(fnow, current_grad)
+            if iteration - p_iter >= 6 and ((iteration >= p_iter * 2.78) or np.any(n_exps < exps)):
+                exps = n_exps
+                p_iter = iteration
+                print ''
+            print_out(len_maxiters, fnow, current_grad, beta, iteration)
 
         if success:
             # Test for termination
-            if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
-                status = 'converged'
+
+            if (np.abs(fnew - fold) < ftol):
+                status = 'converged - relative reduction in objective'
                 break
 #                 return x, flog, function_eval, status
-
+            elif (np.max(np.abs(alpha * d)) < xtol):
+                status = 'converged - relative stepsize'
+                break
             else:
                 # Update variables for new position
                 gradnew = gradf(x, *optargs)
@@ -139,7 +154,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
                 fold = fnew
                 # If the gradient is zero then we are done.
                 if current_grad <= gtol:
-                    status = 'converged'
+                    status = 'converged - relative reduction in gradient'
                     break
                     # return x, flog, function_eval, status
 
@@ -164,6 +179,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         status = "maxiter exceeded"
 
     if display:
-        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
         print ""
+        print_out(len_maxiters, fnow, current_grad, beta, iteration)
+        print ""
+        print status
     return x, flog, function_eval, status

From 7325e319b40aff9c94c2d564c3cbf29f8ef40211 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:00:42 +0100
Subject: [PATCH 15/20] plot_latent enhancements

---
 GPy/models/bayesian_gplvm.py |  5 ++--
 GPy/models/mrd.py            | 47 +++++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 916c307d..68b0a133 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -24,8 +24,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
 
     """
     def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, oldpsave=10, _debug=False,
-                 **kwargs):
+                 Z=None, kernel=None, **kwargs):
         if type(likelihood_or_Y) is np.ndarray:
             likelihood = Gaussian(likelihood_or_Y)
         else:
@@ -117,7 +116,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
         return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
 
     def plot_latent(self, *args, **kwargs):
-        return plot_latent.plot_latent_indices(self, *args, **kwargs)
+        return plot_latent.plot_latent(self, *args, **kwargs)
 
     def do_test_latents(self, Y):
         """
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 0af0dede..327c198f 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -48,7 +48,7 @@ class MRD(Model):
                  kernels=None, initx='PCA',
                  initz='permute', _debug=False, **kw):
         if names is None:
-            self.names = ["{}".format(i + 1) for i in range(len(likelihood_or_Y_list))]
+            self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))]
 
         # sort out the kernels
         if kernels is None:
@@ -281,12 +281,23 @@ class MRD(Model):
         self.Z = Z
         return Z
 
-    def _handle_plotting(self, fignum, axes, plotf):
+    def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
         if axes is None:
             fig = pylab.figure(num=fignum)
+        sharex_ax = None
+        sharey_ax = None
         for i, g in enumerate(self.bgplvms):
+            try:
+                if sharex:
+                    sharex_ax = ax # @UndefinedVariable
+                    sharex = False # dont set twice
+                if sharey:
+                    sharey_ax = ax # @UndefinedVariable
+                    sharey = False # dont set twice
+            except:
+                pass
             if axes is None:
-                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
+                ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
             elif isinstance(axes, (tuple, list)):
                 ax = axes[i]
             else:
@@ -306,16 +317,27 @@ class MRD(Model):
         fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
         return fig
 
-    def plot_predict(self, fignum=None, ax=None, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs))
+    def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
+        fig = self._handle_plotting(fignum,
+                                    ax,
+                                    lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
+                                    sharex=sharex, sharey=sharey)
         return fig
 
-    def plot_scales(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, title=r'$Y_{}$'.format(i), *args, **kwargs))
+    def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
+        """
+        :param:`titles` :
+            titles for axes of datasets
+        """
+        if titles is None:
+            titles = [r'${}$'.format(name) for name in self.names]
+        def plotf(i, g, ax):
+            g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
+        fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
         return fig
 
     def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self.gref.plot_latent(*args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
+        fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):
@@ -331,13 +353,4 @@ class MRD(Model):
         pylab.draw()
         fig.tight_layout()
 
-    def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
-        iters = 0
-        optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
-        self._debug_plot()
-        raw_input("enter to start debug")
-        while iters < maxiters:
-            optstep()
-            self._debug_plot()
-            iters += itersteps
 

From c0d514b6c094e282d33b43f1b936e9704921263e Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:01:31 +0100
Subject: [PATCH 16/20] optional plotting of inducing inputs added

---
 GPy/util/plot_latent.py | 59 +++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 35 deletions(-)

diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py
index 9c832769..d89d90d8 100644
--- a/GPy/util/plot_latent.py
+++ b/GPy/util/plot_latent.py
@@ -2,7 +2,7 @@ import pylab as pb
 import numpy as np
 from .. import util
 
-def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None):
+def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True):
     """
     :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
     :param resolution: the resolution of the grid on which to evaluate the predictive variance
@@ -15,11 +15,11 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
     if labels is None:
         labels = np.ones(model.num_data)
     if which_indices is None:
-        if model.input_dim==1:
+        if model.input_dim == 1:
             input_1 = 0
             input_2 = None
-        if model.input_dim==2:
-            input_1, input_2 = 0,1
+        if model.input_dim == 2:
+            input_1, input_2 = 0, 1
         else:
             try:
                 input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
@@ -28,14 +28,14 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
     else:
         input_1, input_2 = which_indices
 
-    #first, plot the output variance as a function of the latent space
-    Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution)
+    # first, plot the output variance as a function of the latent space
+    Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
     Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
     Xtest_full[:, :2] = Xtest
     mu, var, low, up = model.predict(Xtest_full)
     var = var[:, :1]
     ax.imshow(var.reshape(resolution, resolution).T,
-              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
+              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear', origin='lower')
 
     # make sure labels are in order of input:
     ulabels = []
@@ -47,46 +47,35 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
         if type(ul) is np.string_:
             this_label = ul
         elif type(ul) is np.int64:
-            this_label = 'class %i'%ul
+            this_label = 'class %i' % ul
         else:
-            this_label = 'class %i'%i
+            this_label = 'class %i' % i
         if len(marker) == len(ulabels):
             m = marker[i]
         else:
             m = marker
 
-        index = np.nonzero(labels==ul)[0]
-        if model.input_dim==1:
-            x = model.X[index,input_1]
+        index = np.nonzero(labels == ul)[0]
+        if model.input_dim == 1:
+            x = model.X[index, input_1]
             y = np.zeros(index.size)
         else:
-            x = model.X[index,input_1]
-            y = model.X[index,input_2]
+            x = model.X[index, input_1]
+            y = model.X[index, input_2]
         ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
 
-    ax.set_xlabel('latent dimension %i'%input_1)
-    ax.set_ylabel('latent dimension %i'%input_2)
+    ax.set_xlabel('latent dimension %i' % input_1)
+    ax.set_ylabel('latent dimension %i' % input_2)
 
-    if not np.all(labels==1.):
-        ax.legend(loc=0,numpoints=1)
+    if not np.all(labels == 1.) and legend:
+        ax.legend(loc=0, numpoints=1)
 
-    ax.set_xlim(xmin[0],xmax[0])
-    ax.set_ylim(xmin[1],xmax[1])
+    ax.set_xlim(xmin[0], xmax[0])
+    ax.set_ylim(xmin[1], xmax[1])
     ax.grid(b=False) # remove the grid if present, it doesn't look good
     ax.set_aspect('auto') # set a nice aspect ratio
-    return ax
-
-
-def plot_latent_indices(Model, which_indices=None, *args, **kwargs):
-
-    if which_indices is None:
-        try:
-            input_1, input_2 = np.argsort(Model.input_sensitivity())[:2]
-        except:
-            raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'"
-    else:
-        input_1, input_2 = which_indices
-    ax = plot_latent(Model, which_indices=[input_1, input_2], *args, **kwargs)
-    # TODO: Here test if there are inducing points...
-    ax.plot(Model.Z[:, input_1], Model.Z[:, input_2], '^w')
+
+    if plot_inducing:
+        ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
+
     return ax

From 12640a2d1710a24798b4e51a1865e6e37202bc47 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:02:15 +0100
Subject: [PATCH 17/20] plot ard ticks improved

---
 GPy/kern/kern.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 4bb4752f..5cd90749 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -82,9 +82,10 @@ class kern(Parameterized):
                 else:
                     ard_params = 1. / p.lengthscale
 
-                ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
-                ax.set_xticks(np.arange(len(ard_params)))
-                ax.set_xticklabels([r"${}$".format(i) for i in range(len(ard_params))])
+                x = np.arange(len(ard_params))
+                ax.bar(x - 0.4, ard_params)
+                ax.set_xticks(x)
+                ax.set_xticklabels([r"${}$".format(i) for i in x])
         return ax
 
     def _transform_gradients(self, g):

From 29746cd8bed28197be8255e2742aa1bd05e3e7c0 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:02:59 +0100
Subject: [PATCH 18/20] added value restoring if unpickling objects

---
 GPy/core/parameterized.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/GPy/core/parameterized.py b/GPy/core/parameterized.py
index a2f2bf25..f4d6afa1 100644
--- a/GPy/core/parameterized.py
+++ b/GPy/core/parameterized.py
@@ -45,7 +45,9 @@ class Parameterized(object):
 
     def __setstate__(self, state):
         if self._has_get_set_state():
-            return self.setstate(state)
+            self.setstate(state) # set state
+            self._set_params(self._get_params()) # restore all values
+            return
         self.__dict__ = state
 
     def _has_get_set_state(self):

From 96829862ccd2b1053f58eecc25c60989c8201a93 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:13:12 +0100
Subject: [PATCH 19/20] minor printing improvements

---
 GPy/inference/scg.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/GPy/inference/scg.py b/GPy/inference/scg.py
index 680874e4..ba72bf60 100644
--- a/GPy/inference/scg.py
+++ b/GPy/inference/scg.py
@@ -129,12 +129,17 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
 
         iteration += 1
         if display:
-            n_exps = exponents(fnow, current_grad)
-            if iteration - p_iter >= 6 and ((iteration >= p_iter * 2.78) or np.any(n_exps < exps)):
-                exps = n_exps
-                p_iter = iteration
-                print ''
             print_out(len_maxiters, fnow, current_grad, beta, iteration)
+            n_exps = exponents(fnow, current_grad)
+            if iteration - p_iter >= 6:
+                a = iteration >= p_iter * 2.78
+                b = np.any(n_exps < exps)
+                if a or b:
+                    print ''
+                if a:
+                    p_iter = iteration
+                if b:
+                    exps = n_exps
 
         if success:
             # Test for termination

From a8eb7eb5f7a70faaaef58643e128963082866a33 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Fri, 28 Jun 2013 11:13:20 +0100
Subject: [PATCH 20/20] minor adjustments

---
 GPy/examples/dimensionality_reduction.py | 7 ++++---
 GPy/testing/cgd_tests.py                 | 1 -
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 8f6fdbe7..810098fe 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -254,7 +254,7 @@ def bgplvm_simulation(optimize='scg',
     Y = Ylist[0]
 
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
-    m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
+    m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
 
     # m.constrain('variance|noise', logexp_clipped())
     m['noise'] = Y.var() / 100.
@@ -279,11 +279,12 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
 
     reload(mrd); reload(kern)
 
-    k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
     m = mrd.MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
+    m.ensure_default_constraints()
 
     for i, Y in enumerate(Ylist):
-        m['{}_noise'.format(i + 1)] = Y.var() / 100.
+        m['{}_noise'.format(i)] = Y.var() / 100.
 
 
     # DEBUG
diff --git a/GPy/testing/cgd_tests.py b/GPy/testing/cgd_tests.py
index d999c6fc..82041c9f 100644
--- a/GPy/testing/cgd_tests.py
+++ b/GPy/testing/cgd_tests.py
@@ -7,7 +7,6 @@ import unittest
 import numpy
 from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
 import pylab
-import time
 from scipy.optimize.optimize import rosen, rosen_der
 from GPy.inference.gradient_descent_update_rules import PolakRibiere