diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index e9e049b0..8b040984 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from model import *
-from parameterised import *
+from parameterized import *
 import priors
 from gp import GP
 from sparse_gp import SparseGP
diff --git a/GPy/core/domains.py b/GPy/core/domains.py
index dfc880f6..cefac6c2 100644
--- a/GPy/core/domains.py
+++ b/GPy/core/domains.py
@@ -2,6 +2,22 @@
 Created on 4 Jun 2013
 
 @author: maxz
+
+(Hyper-)Parameter domains defined for :py:mod:`~GPy.core.priors` and :py:mod:`~GPy.kern`.
+These domains specify the legitimate realm of the parameters to live in.
+
+:const:`~GPy.core.domains.REAL` :
+    real domain, all values in the real numbers are allowed
+
+:const:`~GPy.core.domains.POSITIVE`:
+    positive domain, only positive real values are allowed
+    
+:const:`~GPy.core.domains.NEGATIVE`:
+    same as :const:`~GPy.core.domains.POSITIVE`, but only negative values are allowed
+    
+:const:`~GPy.core.domains.BOUNDED`:
+    only values within the bounded range are allowed,
+    the bounds are specified withing the object with the bounded range
 '''
 
 REAL = 'real'
diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 5172d9e7..a0e60bcc 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -6,7 +6,7 @@ import numpy as np
 import pylab as pb
 from .. import kern
 from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
-#from ..util.plot import gpplot,  Tango
+# from ..util.plot import gpplot,  Tango
 from ..likelihoods import EP
 from gp_base import GPBase
 
@@ -31,6 +31,13 @@ class GP(GPBase):
         GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self._set_params(self._get_params())
 
+    def getstate(self):
+        return GPBase.getstate(self)
+
+    def setstate(self, state):
+        GPBase.setstate(self, state)
+        self._set_params(self._get_params())
+
     def _set_params(self, p):
         self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
         self.likelihood._set_params(p[self.kern.num_params_transformed():])
@@ -42,12 +49,12 @@ class GP(GPBase):
 
         # the gradient of the likelihood wrt the covariance matrix
         if self.likelihood.YYT is None:
-            #alpha = np.dot(self.Ki, self.likelihood.Y)
-            alpha,_ = dpotrs(self.L, self.likelihood.Y,lower=1)
+            # alpha = np.dot(self.Ki, self.likelihood.Y)
+            alpha, _ = dpotrs(self.L, self.likelihood.Y, lower=1)
 
             self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
         else:
-            #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
+            # tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
             tmp, _ = dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
             tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
             self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
@@ -68,7 +75,7 @@ class GP(GPBase):
         """
         self.likelihood.restart()
         self.likelihood.fit_full(self.kern.K(self.X))
-        self._set_params(self._get_params())  # update the GP
+        self._set_params(self._get_params()) # update the GP
 
     def _model_fit_term(self):
         """
@@ -77,7 +84,7 @@ class GP(GPBase):
         if self.likelihood.YYT is None:
             tmp, _ = dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
             return -0.5 * np.sum(np.square(tmp))
-            #return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
+            # return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
         else:
             return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
 
@@ -100,13 +107,13 @@ class GP(GPBase):
         """
         return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
 
-    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
+    def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
         """
         Internal helper function for making predictions, does not account
         for normalization or likelihood
         """
-        Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
-        #KiKx = np.dot(self.Ki, Kx)
+        Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
+        # KiKx = np.dot(self.Ki, Kx)
         KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
         mu = np.dot(KiKx.T, self.likelihood.Y)
         if full_cov:
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 63568968..750788e9 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -29,10 +29,35 @@ class GPBase(Model):
             self._Xscale = np.ones((1, self.input_dim))
 
         super(GPBase, self).__init__()
-        #Model.__init__(self)
+        # Model.__init__(self)
         # All leaf nodes should call self._set_params(self._get_params()) at
         # the end
 
+    def getstate(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return Model.getstate(self) + [self.X,
+                self.num_data,
+                self.input_dim,
+                self.kern,
+                self.likelihood,
+                self.output_dim,
+                self._Xoffset,
+                self._Xscale]
+
+    def setstate(self, state):
+        self._Xscale = state.pop()
+        self._Xoffset = state.pop()
+        self.output_dim = state.pop()
+        self.likelihood = state.pop()
+        self.kern = state.pop()
+        self.input_dim = state.pop()
+        self.num_data = state.pop()
+        self.X = state.pop()
+        Model.setstate(self, state)
+
     def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
         """
         Plot the GP's view of the world, where the data is normalized and the
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 05375b2a..5de114c5 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -6,32 +6,50 @@ from .. import likelihoods
 from ..inference import optimization
 from ..util.linalg import jitchol
 from GPy.util.misc import opt_wrapper
-from parameterised import Parameterised
+from parameterized import Parameterized
 import multiprocessing as mp
 import numpy as np
 from GPy.core.domains import POSITIVE, REAL
 from numpy.linalg.linalg import LinAlgError
 # import numdifftools as ndt
 
-class Model(Parameterised):
+class Model(Parameterized):
     _fail_count = 0 # Count of failed optimization steps (see objective)
     _allowed_failures = 10 # number of allowed failures
     def __init__(self):
-        Parameterised.__init__(self)
+        Parameterized.__init__(self)
         self.priors = None
         self.optimization_runs = []
         self.sampling_runs = []
         self.preferred_optimizer = 'scg'
         # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
-    def _get_params(self):
-        raise NotImplementedError, "this needs to be implemented to use the Model class"
-    def _set_params(self, x):
-        raise NotImplementedError, "this needs to be implemented to use the Model class"
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the Model class"
     def _log_likelihood_gradients(self):
         raise NotImplementedError, "this needs to be implemented to use the Model class"
 
+    def getstate(self):
+        """
+        Get the current state of the class.
+        
+        Inherited from Parameterized, so add those parameters to the state
+        """
+        return Parameterized.getstate(self) + \
+            [self.priors, self.optimization_runs,
+             self.sampling_runs, self.preferred_optimizer]
+
+    def setstate(self, state):
+        """
+        set state from previous call to getstate
+        
+        call Parameterized with the rest of the state
+        """
+        self.preferred_optimizer = state.pop()
+        self.sampling_runs = state.pop()
+        self.optimization_runs = state.pop()
+        self.priors = state.pop()
+        Parameterized.setstate(self, state)
+
     def set_prior(self, regexp, what):
         """
         Sets priors on the Model parameters.
@@ -254,12 +272,13 @@ class Model(Parameterised):
         """
         try:
             self._set_params_transformed(x)
+            obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
             self._fail_count = 0
         except (LinAlgError, ZeroDivisionError, ValueError) as e:
             if self._fail_count >= self._allowed_failures:
                 raise e
             self._fail_count += 1
-        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+            obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
         return obj_grads
 
     def objective_and_gradients(self, x):
@@ -267,12 +286,13 @@ class Model(Parameterised):
             self._set_params_transformed(x)
             obj_f = -self.log_likelihood() - self.log_prior()
             self._fail_count = 0
+            obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
         except (LinAlgError, ZeroDivisionError, ValueError) as e:
             if self._fail_count >= self._allowed_failures:
                 raise e
             self._fail_count += 1
             obj_f = np.inf
-        obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+            obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
         return obj_f, obj_grads
 
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -293,7 +313,9 @@ class Model(Parameterised):
 
         optimizer = optimization.get_optimizer(optimizer)
         opt = optimizer(start, model=self, **kwargs)
+
         opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
+
         self.optimization_runs.append(opt)
 
         self._set_params_transformed(opt.x_opt)
@@ -336,7 +358,7 @@ class Model(Parameterised):
         return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
 
     def __str__(self):
-        s = Parameterised.__str__(self).split('\n')
+        s = Parameterized.__str__(self).split('\n')
         # add priors to the string
         if self.priors is not None:
             strs = [str(p) if p is not None else '' for p in self.priors]
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterized.py
similarity index 86%
rename from GPy/core/parameterised.py
rename to GPy/core/parameterized.py
index b3a5712a..f4d6afa1 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterized.py
@@ -9,7 +9,7 @@ import cPickle
 import warnings
 import transformations
 
-class Parameterised(object):
+class Parameterized(object):
     def __init__(self):
         """
         This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
@@ -20,55 +20,60 @@ class Parameterised(object):
         self.constrained_indices = []
         self.constraints = []
 
-    def pickle(self, filename, protocol= -1):
-        f = file(filename, 'w')
-        cPickle.dump(self, f, protocol)
-        f.close()
+    def _get_params(self):
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
+    def _set_params(self, x):
+        raise NotImplementedError, "this needs to be implemented to use the Model class"
+
+    def pickle(self, filename, protocol=None):
+        if protocol is None:
+            if self._has_get_set_state():
+                protocol = 0
+            else:
+                protocol = -1
+        with open(filename, 'w') as f:
+            cPickle.dump(self, f, protocol)
 
     def copy(self):
         """Returns a (deep) copy of the current model """
         return copy.deepcopy(self)
 
-    @property
-    def params(self):
+    def __getstate__(self):
+        if self._has_get_set_state():
+            return self.getstate()
+        return self.__dict__
+
+    def __setstate__(self, state):
+        if self._has_get_set_state():
+            self.setstate(state) # set state
+            self._set_params(self._get_params()) # restore all values
+            return
+        self.__dict__ = state
+
+    def _has_get_set_state(self):
+        return 'getstate' in vars(self.__class__) and 'setstate' in vars(self.__class__)
+
+    def getstate(self):
         """
-        Returns a **copy** of parameters in non transformed space
-
-        :see_also: :py:func:`GPy.core.Parameterised.params_transformed`
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        
+        For inheriting from Parameterized:
+        Allways append the state of the inherited object 
+        and call down to the inherited object in setstate!! 
         """
-        return self._get_params()
+        return [self.tied_indices,
+                self.fixed_indices,
+                self.fixed_values,
+                self.constrained_indices,
+                self.constraints]
 
-    @params.setter
-    def params(self, params):
-        self._set_params(params)
-
-    @property
-    def params_transformed(self):
-        """
-        Returns a **copy** of parameters in transformed space
-
-        :see_also: :py:func:`GPy.core.Parameterised.params`
-        """
-        return self._get_params_transformed()
-
-    @params_transformed.setter
-    def params_transformed(self, params):
-        self._set_params_transformed(params)
-
-    _get_set_deprecation = """get and set methods wont be available at next minor release
-        in the next releases you will get and set with following syntax:
-        Assume m is a model class:
-        print m['var']          # > prints all parameters matching 'var'
-        m['var'] = 2.           # > sets all parameters matching 'var' to 2.
-        m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
-        """
-    def get(self, regexp):
-        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
-        return self[regexp]
-
-    def set(self, regexp, val):
-        warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
-        self[regexp] = val
+    def setstate(self, state):
+        self.constraints = state.pop()
+        self.constrained_indices = state.pop()
+        self.fixed_values = state.pop()
+        self.fixed_indices = state.pop()
+        self.tied_indices = state.pop()
 
     def __getitem__(self, regexp, return_names=False):
         """
@@ -95,13 +100,16 @@ class Parameterised(object):
         if len(matches):
             val = np.array(val)
             assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
-            x = self.params
+            x = self._get_params()
             x[matches] = val
-            self.params = x
+            self._set_params(x)
         else:
             raise AttributeError, "no parameter matches %s" % name
 
     def tie_params(self, regexp):
+        """
+        Tie (all!) parameters matching the regular expression `regexp`. 
+        """
         matches = self.grep_param_names(regexp)
         assert matches.size > 0, "need at least something to tie together"
         if len(self.tied_indices):
@@ -181,7 +189,7 @@ class Parameterised(object):
 
     def constrain_negative(self, regexp):
         """ Set negative constraints. """
-        self.constrain(regexp, transformations.negative_exponent())
+        self.constrain(regexp, transformations.negative_logexp())
 
     def constrain_positive(self, regexp):
         """ Set positive constraints. """
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 04119071..93ba5d7d 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -33,10 +33,11 @@ class SparseGP(GPBase):
 
         self.Z = Z
         self.num_inducing = Z.shape[0]
-        self.likelihood = likelihood
+#         self.likelihood = likelihood
 
         if X_variance is None:
             self.has_uncertain_inputs = False
+            self.X_variance = None
         else:
             assert X_variance.shape == X.shape
             self.has_uncertain_inputs = True
@@ -49,6 +50,23 @@ class SparseGP(GPBase):
         if self.has_uncertain_inputs:
             self.X_variance /= np.square(self._Xscale)
 
+    def getstate(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return GPBase.getstate(self) + [self.Z,
+                self.num_inducing,
+                self.has_uncertain_inputs,
+                self.X_variance]
+
+    def setstate(self, state):
+        self.X_variance = state.pop()
+        self.has_uncertain_inputs = state.pop()
+        self.num_inducing = state.pop()
+        self.Z = state.pop()
+        GPBase.setstate(self, state)
+
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
         self.Kmm = self.kern.K(self.Z)
@@ -79,7 +97,7 @@ class SparseGP(GPBase):
             tmp = tmp.T
         else:
             if self.likelihood.is_heteroscedastic:
-                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data,1)))
+                tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
             else:
                 tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
         tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)
@@ -166,7 +184,7 @@ class SparseGP(GPBase):
         return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
 
     def _get_param_names(self):
-        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[])\
+        return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
             + self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
 
     def update_likelihood_approximation(self):
@@ -223,7 +241,7 @@ class SparseGP(GPBase):
     def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
         """Internal helper function for making predictions, does not account for normalization"""
 
-        Bi, _ = dpotri(self.LB, lower=0)  # WTH? this lower switch should be 1, but that doesn't work!
+        Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
         symmetrify(Bi)
         Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
 
@@ -304,7 +322,7 @@ class SparseGP(GPBase):
         GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
         if self.X.shape[1] == 1:
             if self.has_uncertain_inputs:
-                Xu = self.X * self._Xscale + self._Xoffset  # NOTE self.X are the normalized values now
+                Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
                 ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
                             xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
                             ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py
index 1db0e26f..5d6bcd8b 100644
--- a/GPy/core/svigp.py
+++ b/GPy/core/svigp.py
@@ -91,6 +91,14 @@ class SVIGP(GPBase):
         self._param_steplength_trace = []
         self._vb_steplength_trace = []
 
+    def getstate(self):
+        return GPBase.getstate(self)
+
+
+    def setstate(self, state):
+        return GPBase.setstate(self, state)
+
+
     def _compute_kernel_matrices(self):
         # kernel computations, using BGPLVM notation
         self.Kmm = self.kern.K(self.Z)
diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py
index 2520a33b..5db835a9 100644
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@@ -36,6 +36,20 @@ class logexp(transformation):
     def __str__(self):
         return '(+ve)'
 
+class negative_logexp(transformation):
+    domain = NEGATIVE
+    def f(self, x):
+        return -np.log(1. + np.exp(x))
+    def finv(self, f):
+        return np.log(np.exp(-f) - 1.)
+    def gradfactor(self, f):
+        ef = np.exp(-f)
+        return -(ef - 1.) / ef
+    def initialize(self, f):
+        return -np.abs(f)
+    def __str__(self):
+        return '(-ve)'
+
 class logexp_clipped(transformation):
     max_bound = 1e100
     min_bound = 1e-10
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 8f6fdbe7..810098fe 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -254,7 +254,7 @@ def bgplvm_simulation(optimize='scg',
     Y = Ylist[0]
 
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
-    m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
+    m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
 
     # m.constrain('variance|noise', logexp_clipped())
     m['noise'] = Y.var() / 100.
@@ -279,11 +279,12 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
 
     reload(mrd); reload(kern)
 
-    k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
     m = mrd.MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
+    m.ensure_default_constraints()
 
     for i, Y in enumerate(Ylist):
-        m['{}_noise'.format(i + 1)] = Y.var() / 100.
+        m['{}_noise'.format(i)] = Y.var() / 100.
 
 
     # DEBUG
diff --git a/GPy/inference/scg.py b/GPy/inference/scg.py
index 5753be7f..ba72bf60 100644
--- a/GPy/inference/scg.py
+++ b/GPy/inference/scg.py
@@ -26,11 +26,14 @@ import numpy as np
 import sys
 
 
-def print_out(len_maxiters, display, fnow, current_grad, beta, iteration):
-    if display:
-        print '\r',
-        print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
-        sys.stdout.flush()
+def print_out(len_maxiters, fnow, current_grad, beta, iteration):
+    print '\r',
+    print '{0:>0{mi}g}  {1:> 12e}  {2:> 12e}  {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
+    sys.stdout.flush()
+
+def exponents(fnow, current_grad):
+    exps = [np.abs(fnow), current_grad]
+    return np.sign(exps) * np.log10(exps).astype(int)
 
 def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
     """
@@ -52,6 +55,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         ftol = 1e-6
     if gtol is None:
         gtol = 1e-5
+
     sigma0 = 1.0e-8
     fold = f(x, *optargs) # Initial function value.
     function_eval = 1
@@ -74,6 +78,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
     len_maxiters = len(str(maxiters))
     if display:
         print ' {0:{mi}s}   {1:11s}    {2:11s}    {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
+        exps = exponents(fnow, current_grad)
+        p_iter = iteration
 
     # Main optimization loop.
     while iteration < maxiters:
@@ -104,7 +110,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         function_eval += 1
 
         if function_eval >= max_f_eval:
-            status = "Maximum number of function evaluations exceeded"
+            status = "maximum number of function evaluations exceeded"
             break
 #             return x, flog, function_eval, status
 
@@ -122,15 +128,29 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         flog.append(fnow) # Current function value
 
         iteration += 1
-        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
+        if display:
+            print_out(len_maxiters, fnow, current_grad, beta, iteration)
+            n_exps = exponents(fnow, current_grad)
+            if iteration - p_iter >= 6:
+                a = iteration >= p_iter * 2.78
+                b = np.any(n_exps < exps)
+                if a or b:
+                    print ''
+                if a:
+                    p_iter = iteration
+                if b:
+                    exps = n_exps
 
         if success:
             # Test for termination
-            if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
-                status = 'converged'
+
+            if (np.abs(fnew - fold) < ftol):
+                status = 'converged - relative reduction in objective'
                 break
 #                 return x, flog, function_eval, status
-
+            elif (np.max(np.abs(alpha * d)) < xtol):
+                status = 'converged - relative stepsize'
+                break
             else:
                 # Update variables for new position
                 gradnew = gradf(x, *optargs)
@@ -139,7 +159,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
                 fold = fnew
                 # If the gradient is zero then we are done.
                 if current_grad <= gtol:
-                    status = 'converged'
+                    status = 'converged - relative reduction in gradient'
                     break
                     # return x, flog, function_eval, status
 
@@ -164,6 +184,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
         status = "maxiter exceeded"
 
     if display:
-        print_out(len_maxiters, display, fnow, current_grad, beta, iteration)
         print ""
+        print_out(len_maxiters, fnow, current_grad, beta, iteration)
+        print ""
+        print status
     return x, flog, function_eval, status
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index ad3ed3f9..5cd90749 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -3,12 +3,12 @@
 
 import numpy as np
 import pylab as pb
-from ..core.parameterised import Parameterised
+from ..core.parameterized import Parameterized
 from parts.kernpart import Kernpart
 import itertools
 from parts.prod import Prod as prod
 
-class kern(Parameterised):
+class kern(Parameterized):
     def __init__(self, input_dim, parts=[], input_slices=None):
         """
         This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
@@ -41,26 +41,51 @@ class kern(Parameterised):
 
         self.compute_param_slices()
 
-        Parameterised.__init__(self)
+        Parameterized.__init__(self)
+
+    def getstate(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return Parameterized.getstate(self) + [self.parts,
+                self.Nparts,
+                self.num_params,
+                self.input_dim,
+                self.input_slices,
+                self.param_slices
+                ]
+
+    def setstate(self, state):
+        self.param_slices = state.pop()
+        self.input_slices = state.pop()
+        self.input_dim = state.pop()
+        self.num_params = state.pop()
+        self.Nparts = state.pop()
+        self.parts = state.pop()
+        Parameterized.setstate(self, state)
 
 
-    def plot_ARD(self, fignum=None, ax=None):
+    def plot_ARD(self, fignum=None, ax=None, title=None):
         """If an ARD kernel is present, it bar-plots the ARD parameters"""
         if ax is None:
             fig = pb.figure(fignum)
             ax = fig.add_subplot(111)
         for p in self.parts:
             if hasattr(p, 'ARD') and p.ARD:
-                ax.set_title('ARD parameters, %s kernel' % p.name)
-
+                if title is None:
+                    ax.set_title('ARD parameters, %s kernel' % p.name)
+                else:
+                    ax.set_title(title)
                 if p.name == 'linear':
                     ard_params = p.variances
                 else:
                     ard_params = 1. / p.lengthscale
 
-                ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
-                ax.set_xticks(np.arange(len(ard_params)))
-                ax.set_xticklabels([r"${}$".format(i) for i in range(len(ard_params))])
+                x = np.arange(len(ard_params))
+                ax.bar(x - 0.4, ard_params)
+                ax.set_xticks(x)
+                ax.set_xticklabels([r"${}$".format(i) for i in x])
         return ax
 
     def _transform_gradients(self, g):
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index c401f788..68b0a133 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -24,8 +24,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
 
     """
     def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, oldpsave=10, _debug=False,
-                 **kwargs):
+                 Z=None, kernel=None, **kwargs):
         if type(likelihood_or_Y) is np.ndarray:
             likelihood = Gaussian(likelihood_or_Y)
         else:
@@ -45,32 +44,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
         if kernel is None:
             kernel = kern.rbf(input_dim) + kern.white(input_dim)
 
-        self.oldpsave = oldpsave
-        self._oldps = []
-        self._debug = _debug
-
-        if self._debug:
-            self.f_call = 0
-            self._count = itertools.count()
-            self._savedklll = []
-            self._savedparams = []
-            self._savedgradients = []
-            self._savederrors = []
-            self._savedpsiKmm = []
-            self._savedABCD = []
-
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
         self.ensure_default_constraints()
 
-    @property
-    def oldps(self):
-        return self._oldps
-    @oldps.setter
-    def oldps(self, p):
-        if len(self._oldps) == (self.oldpsave + 1):
-            self._oldps.pop()
-        # if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
-        self._oldps.insert(0, p.copy())
+    def getstate(self):
+        """
+        Get the current state of the class,
+        here just all the indices, rest can get recomputed
+        """
+        return SparseGP.getstate(self) + [self.init]
+
+    def setstate(self, state):
+        self.init = state.pop()
+        SparseGP.setstate(self, state)
 
     def _get_param_names(self):
         X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
@@ -90,24 +76,11 @@ class BayesianGPLVM(SparseGP, GPLVM):
         x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
         return x
 
-    def _clipped(self, x):
-        return x # np.clip(x, -1e300, 1e300)
-
     def _set_params(self, x, save_old=True, save_count=0):
-#         try:
-            x = self._clipped(x)
-            N, input_dim = self.num_data, self.input_dim
-            self.X = x[:self.X.size].reshape(N, input_dim).copy()
-            self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
-            SparseGP._set_params(self, x[(2 * N * input_dim):])
-#             self.oldps = x
-#         except (LinAlgError, FloatingPointError, ZeroDivisionError):
-#             print "\rWARNING: Caught LinAlgError, continueing without setting            "
-#             if self._debug:
-#                 self._savederrors.append(self.f_call)
-#             if save_count > 10:
-#                 raise
-#             self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
+        N, input_dim = self.num_data, self.input_dim
+        self.X = x[:self.X.size].reshape(N, input_dim).copy()
+        self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
+        SparseGP._set_params(self, x[(2 * N * input_dim):])
 
     def dKL_dmuS(self):
         dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
@@ -131,56 +104,19 @@ class BayesianGPLVM(SparseGP, GPLVM):
     def log_likelihood(self):
         ll = SparseGP.log_likelihood(self)
         kl = self.KL_divergence()
-
-#         if ll < -2E4:
-#             ll = -2E4 + np.random.randn()
-#         if kl > 5E4:
-#             kl = 5E4 + np.random.randn()
-
-        if self._debug:
-            self.f_call = self._count.next()
-            if self.f_call % 1 == 0:
-                self._savedklll.append([self.f_call, ll, kl])
-                self._savedparams.append([self.f_call, self._get_params()])
-                self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
-                self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
-#                 sf2 = self.scale_factor ** 2
-                if self.likelihood.is_heteroscedastic:
-                    A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
-#                     B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
-                    B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
-                else:
-                    A = -0.5 * self.num_data * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
-#                     B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
-                    B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
-                C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
-                D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
-                self._savedABCD.append([self.f_call, A, B, C, D])
-
-        # print "\nkl:", kl, "ll:", ll
         return ll - kl
 
     def _log_likelihood_gradients(self):
         dKL_dmu, dKL_dS = self.dKL_dmuS()
         dL_dmu, dL_dS = self.dL_dmuS()
-        # TODO: find way to make faster
-
         d_dmu = (dL_dmu - dKL_dmu).flatten()
         d_dS = (dL_dS - dKL_dS).flatten()
-        # TEST KL: ====================
-        # d_dmu = (dKL_dmu).flatten()
-        # d_dS = (dKL_dS).flatten()
-        # ========================
-        # TEST L: ====================
-#         d_dmu = (dL_dmu).flatten()
-#         d_dS = (dL_dS).flatten()
-        # ========================
         self.dbound_dmuS = np.hstack((d_dmu, d_dS))
         self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
-        return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)))
+        return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
 
     def plot_latent(self, *args, **kwargs):
-        return plot_latent.plot_latent_indices(self, *args, **kwargs)
+        return plot_latent.plot_latent(self, *args, **kwargs)
 
     def do_test_latents(self, Y):
         """
@@ -256,275 +192,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
         fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
         return fig
 
-    def __getstate__(self):
-        return (self.likelihood, self.input_dim, self.X, self.X_variance,
-                self.init, self.num_inducing, self.Z, self.kern,
-                self.oldpsave, self._debug)
-
-    def __setstate__(self, state):
-        self.__init__(*state)
-
-    def _debug_filter_params(self, x):
-        start, end = 0, self.X.size,
-        X = x[start:end].reshape(self.num_data, self.input_dim)
-        start, end = end, end + self.X_variance.size
-        X_v = x[start:end].reshape(self.num_data, self.input_dim)
-        start, end = end, end + (self.num_inducing * self.input_dim)
-        Z = x[start:end].reshape(self.num_inducing, self.input_dim)
-        start, end = end, end + self.input_dim
-        theta = x[start:]
-        return X, X_v, Z, theta
-
-
-    def _debug_get_axis(self, figs):
-        if figs[-1].axes:
-            ax1 = figs[-1].axes[0]
-            ax1.cla()
-        else:
-            ax1 = figs[-1].add_subplot(111)
-        return ax1
-
-    def _debug_plot(self):
-        assert self._debug, "must enable _debug, to debug-plot"
-        import pylab
-#         from mpl_toolkits.mplot3d import Axes3D
-        figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
-#         fig.clf()
-
-        # log like
-#         splotshape = (6, 4)
-#         ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
-        ax1 = self._debug_get_axis(figs)
-        ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
-                 ha='center', va='center')
-        kllls = np.array(self._savedklll)
-        LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
-        KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
-        L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
-
-        param_dict = dict(self._savedparams)
-        gradient_dict = dict(self._savedgradients)
-#         kmm_dict = dict(self._savedpsiKmm)
-        iters = np.array(param_dict.keys())
-        ABCD_dict = np.array(self._savedABCD)
-        self.showing = 0
-
-#         ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
-        figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
-        ax2 = self._debug_get_axis(figs)
-        ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
-        figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
-        ax3 = self._debug_get_axis(figs)
-        ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
-        figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
-        ax4 = self._debug_get_axis(figs)
-        ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(0, 0, 1, .86))
-#         ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
-        figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
-        ax5 = self._debug_get_axis(figs)
-        ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
-                 ha='center', va='center')
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(.15, 0, 1, .86))
-#         figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
-#         fig = figs[-1]
-#         ax6 = fig.add_subplot(121)
-#         ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
-#                  ha='center', va='center')
-#         ax7 = fig.add_subplot(122)
-#         ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
-#                  ha='center', va='center')
-        figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
-        fig = figs[-1]
-        ax8 = fig.add_subplot(121)
-        ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes,
-                 ha='center', va='center')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C')
-        ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim')
-        ax8.legend()
-        figs[-1].canvas.draw()
-        figs[-1].tight_layout(rect=(.15, 0, 1, .86))
-
-        X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
-        Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
-#         Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
-
-        quiver_units = 'xy'
-        quiver_scale = 1
-        quiver_scale_units = 'xy'
-        Xlatentplts = ax2.plot(X, ls="-", marker="x")
-        colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
-        Ulatent = np.zeros_like(X)
-        xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
-        Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-
-        Slatentplts = ax3.plot(S, ls="-", marker="x")
-        Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-        ax3.set_ylim(0, 1.)
-
-        xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
-        UZ = np.zeros_like(Z)
-        Zplts = ax4.plot(Z, ls="-", marker="x")
-        Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale)
-
-        xtheta = np.arange(len(theta))
-        Utheta = np.zeros_like(theta)
-        thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
-        thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
-                                  units=quiver_units, scale_units=quiver_scale_units,
-                                  scale=quiver_scale,
-                                  edgecolors=('k',), linewidths=[1])
-        pylab.setp(thetaplts, zorder=0)
-        pylab.setp(thetagrads, zorder=10)
-        ax5.set_xticks(np.arange(len(theta)))
-        ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
-
-#         imkmm = ax6.imshow(kmm_dict[self.showing][0])
-#         from mpl_toolkits.axes_grid1 import make_axes_locatable
-#         divider = make_axes_locatable(ax6)
-#         caxkmm = divider.append_axes("right", "5%", pad="1%")
-#         cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
-#
-#         imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
-#         divider = make_axes_locatable(ax7)
-#         caxkmmdl = divider.append_axes("right", "5%", pad="1%")
-#         cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
-
-#         input_dimleg = ax1.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-#                    loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.15, 1, 1.15),
-#                    borderaxespad=0, mode="expand")
-        ax2.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax3.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax4.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        ax5.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
-                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
-                   borderaxespad=0, mode="expand")
-        Lleg = ax1.legend()
-        Lleg.draggable()
-#         ax1.add_artist(input_dimleg)
-
-        indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
-        indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
-        indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
-#         for err in self._savederrors:
-#             if err < kllls.shape[0]:
-#                 ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
-#                 ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
-#                 ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
-
-#         try:
-#             for f in figs:
-#                 f.canvas.draw()
-#                 f.tight_layout(box=(0, .15, 1, .9))
-# #             pylab.draw()
-# #             pylab.tight_layout(box=(0, .1, 1, .9))
-#         except:
-#             pass
-
-        # parameter changes
-        # ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
-        button_options = [0, 0] # [0]: clicked -- [1]: dragged
-
-        def update_plots(event):
-            if button_options[0] and not button_options[1]:
-#               event.button, event.x, event.y, event.xdata, event.ydata)
-                tmp = np.abs(iters - event.xdata)
-                closest_hit = iters[tmp == tmp.min()][0]
-
-                if closest_hit != self.showing:
-                    self.showing = closest_hit
-                    # print closest_hit, iters, event.xdata
-
-                    indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
-                    indicatorKL.set_data(self.showing, kllls[self.showing, 2])
-                    indicatorL.set_data(self.showing, kllls[self.showing, 1])
-
-                    X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
-                    Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
-#                     Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
-
-                    for i, Xlatent in enumerate(Xlatentplts):
-                        Xlatent.set_ydata(X[:, i])
-                    Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
-                    Xlatentgrads.set_UVC(Ulatent, Xg)
-
-                    for i, Slatent in enumerate(Slatentplts):
-                        Slatent.set_ydata(S[:, i])
-                    Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
-                    Slatentgrads.set_UVC(Ulatent, Sg)
-
-                    for i, Zlatent in enumerate(Zplts):
-                        Zlatent.set_ydata(Z[:, i])
-                    Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
-                    Zgrads.set_UVC(UZ, Zg)
-
-                    for p, t in zip(thetaplts, theta):
-                        p.set_height(t)
-                    thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
-                    thetagrads.set_UVC(Utheta, thetag)
-
-#                     imkmm.set_data(kmm_dict[self.showing][0])
-#                     imkmm.autoscale()
-#                     cbarkmm.update_normal(imkmm)
-#
-#                     imkmmdl.set_data(kmm_dict[self.showing][1])
-#                     imkmmdl.autoscale()
-#                     cbarkmmdl.update_normal(imkmmdl)
-
-                    ax2.relim()
-                    # ax3.relim()
-                    ax4.relim()
-                    ax5.relim()
-                    ax2.autoscale()
-                    # ax3.autoscale()
-                    ax4.autoscale()
-                    ax5.autoscale()
-
-                    [fig.canvas.draw() for fig in figs]
-            button_options[0] = 0
-            button_options[1] = 0
-
-        def onclick(event):
-            if event.inaxes is ax1 and event.button == 1:
-                button_options[0] = 1
-        def motion(event):
-            if button_options[0]:
-                button_options[1] = 1
-
-        cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
-        cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
-        cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
-
-        return ax1, ax2, ax3, ax4, ax5 # , ax6, ax7
-
-
-
-
 def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
     objective function for fitting the latent variables for test points
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index db5d21b2..615e6618 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -25,11 +25,20 @@ class GPRegression(GP):
 
     """
 
-    def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
+    def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False):
         if kernel is None:
             kernel = kern.rbf(X.shape[1])
 
-        likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
+        likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
 
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self.ensure_default_constraints()
+
+    def getstate(self):
+        return GP.getstate(self)
+
+
+    def setstate(self, state):
+        return GP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index 44a9d2ce..305ad120 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -1,4 +1,4 @@
-### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
@@ -26,11 +26,11 @@ class GPLVM(GP):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, input_dim, init='PCA', X = None, kernel=None, normalize_Y=False):
+    def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False):
         if X is None:
             X = self.initialise_latent(init, input_dim, Y)
         if kernel is None:
-            kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
+            kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
         likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, normalize_X=False)
         self.ensure_default_constraints()
@@ -42,26 +42,26 @@ class GPLVM(GP):
             return np.random.randn(Y.shape[0], input_dim)
 
     def _get_param_names(self):
-        return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.num_data)],[]) + GP._get_param_names(self)
+        return sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + GP._get_param_names(self)
 
     def _get_params(self):
         return np.hstack((self.X.flatten(), GP._get_params(self)))
 
-    def _set_params(self,x):
-        self.X = x[:self.num_data*self.input_dim].reshape(self.num_data,self.input_dim).copy()
+    def _set_params(self, x):
+        self.X = x[:self.num_data * self.input_dim].reshape(self.num_data, self.input_dim).copy()
         GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):
-        dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
+        dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
 
-        return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
+        return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
 
     def plot(self):
-        assert self.likelihood.Y.shape[1]==2
-        pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
-        Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
+        assert self.likelihood.Y.shape[1] == 2
+        pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)
+        Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
         mu, var, upper, lower = self.predict(Xnew)
-        pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
+        pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
 
     def plot_latent(self, *args, **kwargs):
         return util.plot_latent.plot_latent(self, *args, **kwargs)
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 1d521e5d..327c198f 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -48,7 +48,7 @@ class MRD(Model):
                  kernels=None, initx='PCA',
                  initz='permute', _debug=False, **kw):
         if names is None:
-            self.names = ["{}".format(i + 1) for i in range(len(likelihood_or_Y_list))]
+            self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))]
 
         # sort out the kernels
         if kernels is None:
@@ -61,12 +61,14 @@ class MRD(Model):
         assert not ('kernel' in kw), "pass kernels through `kernels` argument"
 
         self.input_dim = input_dim
-        self.num_inducing = num_inducing
         self._debug = _debug
+        self.num_inducing = num_inducing
 
         self._init = True
         X = self._init_X(initx, likelihood_or_Y_list)
         Z = self._init_Z(initz, X)
+        self.num_inducing = Z.shape[0] # ensure M==N if M>N
+
         self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
         del self._init
 
@@ -75,12 +77,36 @@ class MRD(Model):
         self.nparams = nparams.cumsum()
 
         self.num_data = self.gref.num_data
+
         self.NQ = self.num_data * self.input_dim
         self.MQ = self.num_inducing * self.input_dim
 
         Model.__init__(self)
         self.ensure_default_constraints()
 
+    def getstate(self):
+        return Model.getstate(self) + [self.names,
+                self.bgplvms,
+                self.gref,
+                self.nparams,
+                self.input_dim,
+                self.num_inducing,
+                self.num_data,
+                self.NQ,
+                self.MQ]
+
+    def setstate(self, state):
+        self.MQ = state.pop()
+        self.NQ = state.pop()
+        self.num_data = state.pop()
+        self.num_inducing = state.pop()
+        self.input_dim = state.pop()
+        self.nparams = state.pop()
+        self.gref = state.pop()
+        self.bgplvms = state.pop()
+        self.names = state.pop()
+        Model.setstate(self, state)
+
     @property
     def X(self):
         return self.gref.X
@@ -255,12 +281,23 @@ class MRD(Model):
         self.Z = Z
         return Z
 
-    def _handle_plotting(self, fignum, axes, plotf):
+    def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
         if axes is None:
-            fig = pylab.figure(num=fignum, figsize=(4 * len(self.bgplvms), 3))
+            fig = pylab.figure(num=fignum)
+        sharex_ax = None
+        sharey_ax = None
         for i, g in enumerate(self.bgplvms):
+            try:
+                if sharex:
+                    sharex_ax = ax # @UndefinedVariable
+                    sharex = False # dont set twice
+                if sharey:
+                    sharey_ax = ax # @UndefinedVariable
+                    sharey = False # dont set twice
+            except:
+                pass
             if axes is None:
-                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
+                ax = fig.add_subplot(1, len(self.bgplvms), i + 1, sharex=sharex_ax, sharey=sharey_ax)
             elif isinstance(axes, (tuple, list)):
                 ax = axes[i]
             else:
@@ -280,16 +317,27 @@ class MRD(Model):
         fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
         return fig
 
-    def plot_predict(self, fignum=None, ax=None, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs))
+    def plot_predict(self, fignum=None, ax=None, sharex=False, sharey=False, **kwargs):
+        fig = self._handle_plotting(fignum,
+                                    ax,
+                                    lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs),
+                                    sharex=sharex, sharey=sharey)
         return fig
 
-    def plot_scales(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
+    def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
+        """
+        :param:`titles` :
+            titles for axes of datasets
+        """
+        if titles is None:
+            titles = [r'${}$'.format(name) for name in self.names]
+        def plotf(i, g, ax):
+            g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)
+        fig = self._handle_plotting(fignum, ax, plotf, sharex=sharex, sharey=sharey)
         return fig
 
     def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
+        fig = self.gref.plot_latent(fignum=fignum, ax=ax, *args, **kwargs) # self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):
@@ -305,13 +353,4 @@ class MRD(Model):
         pylab.draw()
         fig.tight_layout()
 
-    def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
-        iters = 0
-        optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
-        self._debug_plot()
-        raw_input("enter to start debug")
-        while iters < maxiters:
-            optstep()
-            self._debug_plot()
-            iters += itersteps
 
diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py
index 9228fb89..5f36ebe1 100644
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@@ -28,7 +28,7 @@ class SparseGPClassification(SparseGP):
 
     def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
         if kernel is None:
-            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
+            kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
 
         if likelihood is None:
             distribution = likelihoods.likelihood_functions.Binomial()
@@ -41,7 +41,16 @@ class SparseGPClassification(SparseGP):
             i = np.random.permutation(X.shape[0])[:num_inducing]
             Z = X[i].copy()
         else:
-            assert Z.shape[1]==X.shape[1]
+            assert Z.shape[1] == X.shape[1]
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
         self.ensure_default_constraints()
+
+    def getstate(self):
+        return SparseGP.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 0dcef3e0..d5fcc7d7 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -43,3 +43,13 @@ class SparseGPRegression(SparseGP):
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
         self.ensure_default_constraints()
+        pass
+
+    def getstate(self):
+        return SparseGP.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGP.setstate(self, state)
+
+    pass
diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py
index d6f4adb9..6e7e40b1 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/models/sparse_gplvm.py
@@ -28,6 +28,14 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
         SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
         self.ensure_default_constraints()
 
+    def getstate(self):
+        return SparseGPRegression.getstate(self)
+
+
+    def setstate(self, state):
+        return SparseGPRegression.setstate(self, state)
+
+
     def _get_param_names(self):
         return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
                 + SparseGPRegression._get_param_names(self))
diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py
index 8448bf37..4d22c619 100644
--- a/GPy/models/svigp_regression.py
+++ b/GPy/models/svigp_regression.py
@@ -42,3 +42,11 @@ class SVIGPRegression(SVIGP):
 
         SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
         self.load_batch()
+
+    def getstate(self):
+        return GPBase.getstate(self)
+
+
+    def setstate(self, state):
+        return GPBase.setstate(self, state)
+
diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py
index fcef66c6..dc6eeb46 100644
--- a/GPy/models/warped_gp.py
+++ b/GPy/models/warped_gp.py
@@ -28,6 +28,14 @@ class WarpedGP(GP):
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
         self._set_params(self._get_params())
 
+    def getstate(self):
+        return GP.getstate(self)
+
+
+    def setstate(self, state):
+        return GP.setstate(self, state)
+
+
     def _scale_data(self, Y):
         self._Ymax = Y.max()
         self._Ymin = Y.min()
diff --git a/GPy/testing/cgd_tests.py b/GPy/testing/cgd_tests.py
index d999c6fc..82041c9f 100644
--- a/GPy/testing/cgd_tests.py
+++ b/GPy/testing/cgd_tests.py
@@ -7,7 +7,6 @@ import unittest
 import numpy
 from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
 import pylab
-import time
 from scipy.optimize.optimize import rosen, rosen_der
 from GPy.inference.gradient_descent_update_rules import PolakRibiere
 
diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py
index ec030055..989251a7 100644
--- a/GPy/testing/examples_tests.py
+++ b/GPy/testing/examples_tests.py
@@ -19,14 +19,14 @@ class ExamplesTests(unittest.TestCase):
         self.assertTrue(isinstance(Model, GPy.models))
 
 """
-def model_instance_generator(Model):
+def model_instance_generator(model):
     def check_model_returned(self):
-        self._model_instance(Model)
+        self._model_instance(model)
     return check_model_returned
 
-def checkgrads_generator(Model):
+def checkgrads_generator(model):
     def model_checkgrads(self):
-        self._checkgrad(Model)
+        self._checkgrad(model)
     return model_checkgrads
 """
 
@@ -37,7 +37,7 @@ def model_checkgrads(model):
 
 def model_instance(model):
     #assert isinstance(model, GPy.core.model)
-    return isinstance(model, GPy.core.Model)
+    return isinstance(model, GPy.core.model)
 
 @nottest
 def test_models():
diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py
index c36c5e34..d89d90d8 100644
--- a/GPy/util/plot_latent.py
+++ b/GPy/util/plot_latent.py
@@ -2,23 +2,24 @@ import pylab as pb
 import numpy as np
 from .. import util
 
-def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
+def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True):
     """
     :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
     :param resolution: the resolution of the grid on which to evaluate the predictive variance
     """
     if ax is None:
-        ax = pb.gca()
+        fig = pb.figure(num=fignum)
+        ax = fig.add_subplot(111)
     util.plot.Tango.reset()
 
     if labels is None:
         labels = np.ones(model.num_data)
     if which_indices is None:
-        if model.input_dim==1:
+        if model.input_dim == 1:
             input_1 = 0
             input_2 = None
-        if model.input_dim==2:
-            input_1, input_2 = 0,1
+        if model.input_dim == 2:
+            input_1, input_2 = 0, 1
         else:
             try:
                 input_1, input_2 = np.argsort(model.input_sensitivity())[:2]
@@ -27,14 +28,14 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
     else:
         input_1, input_2 = which_indices
 
-    #first, plot the output variance as a function of the latent space
-    Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution)
+    # first, plot the output variance as a function of the latent space
+    Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
     Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
     Xtest_full[:, :2] = Xtest
     mu, var, low, up = model.predict(Xtest_full)
     var = var[:, :1]
     ax.imshow(var.reshape(resolution, resolution).T,
-              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
+              extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear', origin='lower')
 
     # make sure labels are in order of input:
     ulabels = []
@@ -46,46 +47,35 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
         if type(ul) is np.string_:
             this_label = ul
         elif type(ul) is np.int64:
-            this_label = 'class %i'%ul
+            this_label = 'class %i' % ul
         else:
-            this_label = 'class %i'%i
+            this_label = 'class %i' % i
         if len(marker) == len(ulabels):
             m = marker[i]
         else:
             m = marker
 
-        index = np.nonzero(labels==ul)[0]
-        if model.input_dim==1:
-            x = model.X[index,input_1]
+        index = np.nonzero(labels == ul)[0]
+        if model.input_dim == 1:
+            x = model.X[index, input_1]
             y = np.zeros(index.size)
         else:
-            x = model.X[index,input_1]
-            y = model.X[index,input_2]
+            x = model.X[index, input_1]
+            y = model.X[index, input_2]
         ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
 
-    ax.set_xlabel('latent dimension %i'%input_1)
-    ax.set_ylabel('latent dimension %i'%input_2)
+    ax.set_xlabel('latent dimension %i' % input_1)
+    ax.set_ylabel('latent dimension %i' % input_2)
 
-    if not np.all(labels==1.):
-        ax.legend(loc=0,numpoints=1)
+    if not np.all(labels == 1.) and legend:
+        ax.legend(loc=0, numpoints=1)
 
-    ax.set_xlim(xmin[0],xmax[0])
-    ax.set_ylim(xmin[1],xmax[1])
+    ax.set_xlim(xmin[0], xmax[0])
+    ax.set_ylim(xmin[1], xmax[1])
     ax.grid(b=False) # remove the grid if present, it doesn't look good
     ax.set_aspect('auto') # set a nice aspect ratio
-    return ax
-
-
-def plot_latent_indices(Model, which_indices=None, *args, **kwargs):
-
-    if which_indices is None:
-        try:
-            input_1, input_2 = np.argsort(Model.input_sensitivity())[:2]
-        except:
-            raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'"
-    else:
-        input_1, input_2 = which_indices
-    ax = plot_latent(Model, which_indices=[input_1, input_2], *args, **kwargs)
-    # TODO: Here test if there are inducing points...
-    ax.plot(Model.Z[:, input_1], Model.Z[:, input_2], '^w')
+
+    if plot_inducing:
+        ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
+
     return ax
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 529f0eff..886e8486 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -103,7 +103,7 @@ class lvm(matplotlib_show):
             self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
 
         self.data_visualize = data_visualize
-        self.Model = model
+        self.model = model
         self.latent_axes = latent_axes
         self.sense_axes = sense_axes
         self.called = False
@@ -120,7 +120,7 @@ class lvm(matplotlib_show):
     def modify(self, vals):
         """When latent values are modified update the latent representation and ulso update the output visualization."""
         self.vals = vals.copy()
-        y = self.Model.predict(self.vals)[0]
+        y = self.model.predict(self.vals)[0]
         self.data_visualize.modify(y)
         self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]])
         self.axes.figure.canvas.draw()
@@ -148,15 +148,15 @@ class lvm(matplotlib_show):
         # A click in the bar chart axis for selection a dimension.
         if self.sense_axes != None:
             self.sense_axes.cla()
-            self.sense_axes.bar(np.arange(self.Model.input_dim),1./self.Model.input_sensitivity(),color='b')
+            self.sense_axes.bar(np.arange(self.model.input_dim),1./self.model.input_sensitivity(),color='b')
 
             if self.latent_index[1] == self.latent_index[0]:
-                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='y')
-                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='y')
+                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
+                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y')
 
             else:
-                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='g')
-                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='r')
+                self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g')
+                self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r')
 
             self.sense_axes.figure.canvas.draw()
 
@@ -193,7 +193,7 @@ class lvm_dimselect(lvm):
     GPy.examples.dimensionality_reduction.BGPVLM_oil()
 
     """
-    def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1], labels=None):
+    def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1], labels=None):
         if latent_axes==None and sense_axes==None:
             self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
         elif sense_axes==None:
@@ -202,7 +202,7 @@ class lvm_dimselect(lvm):
         else:
             self.sense_axes = sense_axes
         self.labels = labels
-        lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
+        lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index)
         self.show_sensitivities()
         print "use left and right mouse butons to select dimensions"
 
@@ -210,7 +210,7 @@ class lvm_dimselect(lvm):
     def on_click(self, event):
 
         if event.inaxes==self.sense_axes:
-            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.Model.input_dim-1))
+            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
             if event.button == 1:
                 # Make it red if and y-axis (red=port=left) if it is a left button click
                 self.latent_index[1] = new_index                
@@ -221,7 +221,7 @@ class lvm_dimselect(lvm):
             self.show_sensitivities()
 
             self.latent_axes.cla()
-            self.Model.plot_latent(which_indices=self.latent_index,
+            self.model.plot_latent(which_indices=self.latent_index,
                                    ax=self.latent_axes, labels=self.labels)
             self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
             self.modify(self.latent_values)
@@ -235,7 +235,7 @@ class lvm_dimselect(lvm):
 
     def on_leave(self,event):
         latent_values = self.latent_values.copy()
-        y = self.Model.predict(latent_values[None,:])[0]
+        y = self.model.predict(latent_values[None,:])[0]
         self.data_visualize.modify(y)
 
 
diff --git a/doc/GPy.core.rst b/doc/GPy.core.rst
index e02aaa2a..0590450f 100644
--- a/doc/GPy.core.rst
+++ b/doc/GPy.core.rst
@@ -9,6 +9,38 @@ core Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`domains` Module
+---------------------
+
+.. automodule:: GPy.core.domains
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`fitc` Module
+------------------
+
+.. automodule:: GPy.core.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gp` Module
+----------------
+
+.. automodule:: GPy.core.gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gp_base` Module
+---------------------
+
+.. automodule:: GPy.core.gp_base
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`model` Module
 -------------------
 
@@ -17,10 +49,10 @@ core Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`parameterised` Module
+:mod:`parameterized` Module
 ---------------------------
 
-.. automodule:: GPy.core.parameterised
+.. automodule:: GPy.core.parameterized
     :members:
     :undoc-members:
     :show-inheritance:
@@ -33,3 +65,27 @@ core Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`sparse_gp` Module
+-----------------------
+
+.. automodule:: GPy.core.sparse_gp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`svigp` Module
+-------------------
+
+.. automodule:: GPy.core.svigp
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`transformations` Module
+-----------------------------
+
+.. automodule:: GPy.core.transformations
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index f17cf826..fedfd4b9 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -25,14 +25,6 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`non_gaussian` Module
---------------------------
-
-.. automodule:: GPy.examples.non_gaussian
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`regression` Module
 ------------------------
 
@@ -41,6 +33,14 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`stochastic` Module
+------------------------
+
+.. automodule:: GPy.examples.stochastic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`tutorials` Module
 -----------------------
 
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
index f30e7d25..6a1bef4a 100644
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@@ -1,10 +1,18 @@
 inference Package
 =================
 
-:mod:`SGD` Module
------------------
+:mod:`conjugate_gradient_descent` Module
+----------------------------------------
 
-.. automodule:: GPy.inference.SGD
+.. automodule:: GPy.inference.conjugate_gradient_descent
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`gradient_descent_update_rules` Module
+-------------------------------------------
+
+.. automodule:: GPy.inference.gradient_descent_update_rules
     :members:
     :undoc-members:
     :show-inheritance:
@@ -25,3 +33,19 @@ inference Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`scg` Module
+-----------------
+
+.. automodule:: GPy.inference.scg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sgd` Module
+-----------------
+
+.. automodule:: GPy.inference.sgd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index aef712dc..35d9ec00 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -9,38 +9,6 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Brownian` Module
-----------------------
-
-.. automodule:: GPy.kern.Brownian
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`Matern32` Module
-----------------------
-
-.. automodule:: GPy.kern.Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`Matern52` Module
-----------------------
-
-.. automodule:: GPy.kern.Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`bias` Module
-------------------
-
-.. automodule:: GPy.kern.bias
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`constructors` Module
 --------------------------
 
@@ -49,30 +17,6 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`coregionalise` Module
----------------------------
-
-.. automodule:: GPy.kern.coregionalise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`exponential` Module
--------------------------
-
-.. automodule:: GPy.kern.exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`finite_dimensional` Module
---------------------------------
-
-.. automodule:: GPy.kern.finite_dimensional
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 :mod:`kern` Module
 ------------------
 
@@ -81,107 +25,10 @@ kern Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`kernpart` Module
-----------------------
+Subpackages
+-----------
 
-.. automodule:: GPy.kern.kernpart
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
-:mod:`linear` Module
---------------------
-
-.. automodule:: GPy.kern.linear
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_Matern32` Module
--------------------------------
-
-.. automodule:: GPy.kern.periodic_Matern32
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_Matern52` Module
--------------------------------
-
-.. automodule:: GPy.kern.periodic_Matern52
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`periodic_exponential` Module
-----------------------------------
-
-.. automodule:: GPy.kern.periodic_exponential
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`prod` Module
-------------------
-
-.. automodule:: GPy.kern.prod
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`prod_orthogonal` Module
------------------------------
-
-.. automodule:: GPy.kern.prod_orthogonal
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`rational_quadratic` Module
---------------------------------
-
-.. automodule:: GPy.kern.rational_quadratic
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`rbf` Module
------------------
-
-.. automodule:: GPy.kern.rbf
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`spline` Module
---------------------
-
-.. automodule:: GPy.kern.spline
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`symmetric` Module
------------------------
-
-.. automodule:: GPy.kern.symmetric
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`sympykern` Module
------------------------
-
-.. automodule:: GPy.kern.sympykern
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`white` Module
--------------------
-
-.. automodule:: GPy.kern.white
-    :members:
-    :undoc-members:
-    :show-inheritance:
+    GPy.kern.parts
 
diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst
index 03c15a82..9fec38f8 100644
--- a/doc/GPy.likelihoods.rst
+++ b/doc/GPy.likelihoods.rst
@@ -9,7 +9,7 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`EP` Module
+:mod:`ep` Module
 ----------------
 
 .. automodule:: GPy.likelihoods.ep
@@ -17,7 +17,7 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Gaussian` Module
+:mod:`gaussian` Module
 ----------------------
 
 .. automodule:: GPy.likelihoods.gaussian
@@ -41,3 +41,11 @@ likelihoods Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`link_functions` Module
+----------------------------
+
+.. automodule:: GPy.likelihoods.link_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index f4ae6a59..4d227642 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -9,7 +9,7 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`Bayesian_GPLVM` Module
+:mod:`bayesian_gplvm` Module
 ----------------------------
 
 .. automodule:: GPy.models.bayesian_gplvm
@@ -17,18 +17,18 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`gp` Module
-----------------
+:mod:`fitc_classification` Module
+---------------------------------
 
-.. automodule:: GPy.models.gp
+.. automodule:: GPy.models.fitc_classification
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`gplvm` Module
--------------------
+:mod:`gp_classification` Module
+-------------------------------
 
-.. automodule:: GPy.models.gplvm
+.. automodule:: GPy.models.gp_classification
     :members:
     :undoc-members:
     :show-inheritance:
@@ -41,18 +41,26 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`sparse_gp` Module
------------------------
+:mod:`gplvm` Module
+-------------------
 
-.. automodule:: GPy.models.sparse_gp
+.. automodule:: GPy.models.gplvm
     :members:
     :undoc-members:
     :show-inheritance:
 
-:mod:`SparseGPLVM` Module
---------------------------
+:mod:`mrd` Module
+-----------------
 
-.. automodule:: GPy.models.sparse_gplvm
+.. automodule:: GPy.models.mrd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`sparse_gp_classification` Module
+--------------------------------------
+
+.. automodule:: GPy.models.sparse_gp_classification
     :members:
     :undoc-members:
     :show-inheritance:
@@ -65,13 +73,21 @@ models Package
     :undoc-members:
     :show-inheritance:
 
-.. :mod:`uncollapsed_sparse_GP` Module
-.. -----------------------------------
+:mod:`sparse_gplvm` Module
+--------------------------
 
-.. .. automodule:: GPy.models.uncollapsed_sparse_GP
-..     :members:
-..     :undoc-members:
-..     :show-inheritance:
+.. automodule:: GPy.models.sparse_gplvm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`svigp_regression` Module
+------------------------------
+
+.. automodule:: GPy.models.svigp_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
 
 :mod:`warped_gp` Module
 -----------------------
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index 5b32558b..6c461177 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -1,6 +1,14 @@
 testing Package
 ===============
 
+:mod:`testing` Package
+----------------------
+
+.. automodule:: GPy.testing
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`bgplvm_tests` Module
 --------------------------
 
@@ -9,6 +17,22 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`cgd_tests` Module
+-----------------------
+
+.. automodule:: GPy.testing.cgd_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`checkgrad` Module
+-----------------------
+
+.. automodule:: GPy.testing.checkgrad
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`examples_tests` Module
 ----------------------------
 
@@ -33,6 +57,14 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`mrd_tests` Module
+-----------------------
+
+.. automodule:: GPy.testing.mrd_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`prior_tests` Module
 -------------------------
 
@@ -41,6 +73,22 @@ testing Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`psi_stat_expactation_tests` Module
+----------------------------------------
+
+.. automodule:: GPy.testing.psi_stat_expactation_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`psi_stat_gradient_tests` Module
+-------------------------------------
+
+.. automodule:: GPy.testing.psi_stat_gradient_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`sparse_gplvm_tests` Module
 --------------------------------
 
diff --git a/doc/GPy.util.rst b/doc/GPy.util.rst
index 5bec990b..e66329b6 100644
--- a/doc/GPy.util.rst
+++ b/doc/GPy.util.rst
@@ -17,6 +17,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`classification` Module
+----------------------------
+
+.. automodule:: GPy.util.classification
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`datasets` Module
 ----------------------
 
@@ -25,6 +33,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`decorators` Module
+------------------------
+
+.. automodule:: GPy.util.decorators
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`linalg` Module
 --------------------
 
@@ -41,6 +57,22 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`mocap` Module
+-------------------
+
+.. automodule:: GPy.util.mocap
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`pca` Module
+-----------------
+
+.. automodule:: GPy.util.pca
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`plot` Module
 ------------------
 
@@ -49,6 +81,14 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`plot_latent` Module
+-------------------------
+
+.. automodule:: GPy.util.plot_latent
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`squashers` Module
 -----------------------
 
@@ -57,6 +97,22 @@ util Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`univariate_Gaussian` Module
+---------------------------------
+
+.. automodule:: GPy.util.univariate_Gaussian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`visualize` Module
+-----------------------
+
+.. automodule:: GPy.util.visualize
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 :mod:`warping_functions` Module
 -------------------------------
 
diff --git a/doc/conf.py b/doc/conf.py
index 8a05f386..4c377cb6 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -103,8 +103,10 @@ class Mock(object):
 #import mock
 
 print "Mocking"
-MOCK_MODULES = ['pylab', 'sympy', 'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 'matplotlib']
-#'matplotlib', 'matplotlib.color', 'matplotlib.pyplot', 'pylab' ]
+MOCK_MODULES = ['sympy', 
+    'sympy.utilities', 'sympy.utilities.codegen', 'sympy.core.cache', 
+    'sympy.core', 'sympy.parsing', 'sympy.parsing.sympy_parser', 
+    ]
 for mod_name in MOCK_MODULES:
     sys.modules[mod_name] = Mock()
 
@@ -288,7 +290,7 @@ latex_elements = {
     #'pointsize': '10pt',
 
     # Additional stuff for the LaTeX preamble.
-    #'preamble': '',
+    'preamble': '\\usepackage{MnSymbol}',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
diff --git a/doc/index.rst b/doc/index.rst
index a7b68c16..29b4cf43 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -11,6 +11,7 @@ For a quick start, you can have a look at one of the tutorials:
 * `Interacting with models <tuto_interacting_with_models.html>`_
 * `A kernel overview <tuto_kernel_overview.html>`_ 
 * `Writing new kernels <tuto_creating_new_kernels.html>`_
+* `Writing new models <tuto_creating_new_models.html>`_
 
 You may also be interested by some examples in the GPy/examples folder.
 
diff --git a/doc/tuto_creating_new_models.rst b/doc/tuto_creating_new_models.rst
new file mode 100644
index 00000000..021b4950
--- /dev/null
+++ b/doc/tuto_creating_new_models.rst
@@ -0,0 +1,64 @@
+.. _creating_new_models:
+
+*******************
+Creating new Models
+*******************
+
+In GPy all models inherit from the base class :py:class:`~GPy.core.parameterized.Parameterized`. :py:class:`~GPy.core.parameterized.Parameterized` is a class which allows for parameterization of objects. All it holds is functionality for tying, bounding and fixing of parameters. It also provides the functionality of searching and manipulating parameters by regular expression syntax. See :py:class:`~GPy.core.parameterized.Parameterized` for more information. 
+
+The :py:class:`~GPy.core.model.Model` class provides parameter introspection, objective function and optimization.
+
+In order to fully use all functionality of :py:class:`~GPy.core.model.Model` some methods need to be implemented / overridden. In order to explain the functionality of those methods we will use a wrapper to the numpy ``rosen`` function, which holds input parameters :math:`\mathbf{X}`. Where :math:`\mathbf{X}\in\mathbb{R}^{N\times 1}`.
+
+Obligatory methods
+==================
+
+:py:meth:`~GPy.core.model.Model.__init__` :
+	Initialize the model with the given parameters. In our example we have to store shape information of :math:`\mathbf X` and the parameters themselves::
+	
+		self.X = X
+		self.num_inputs = self.X.shape[0]
+		assert self.X.ndim == 1, only vector inputs allowed
+
+:py:meth:`~GPy.core.model.Model._get_params` : 
+    Return parameters of the model as a flattened numpy array-like. So, in our example we have to return the input parameters::
+    
+    	return self.X.flatten()
+
+:py:meth:`~GPy.core.model.Model._set_params` : 
+    Set parameters, which have been fetched through :py:meth:`~GPy.core.model.Model._get_params`. In other words, "invert" the functionality of :py:meth:`~GPy.core.model.Model._get_params`::
+
+    	self.X = params[:self.num_inputs*self.input_dim].reshape(self.num_inputs)
+
+:py:meth:`~GPy.core.model.Model.log_likelihood` :
+	Returns the log-likelihood of the new model. For our example this is just the call to ``rosen``::
+
+		return scipy.optimize.rosen(self.X)
+
+:py:meth:`~GPy.core.model.Model._log_likelihood_gradients` :
+	Returns the gradients with respect to all parameters::
+
+		return scipy.optimize.rosen_der(self.X)
+
+
+Optional methods
+================
+
+If you want some special functionality please provide the following methods:
+
+Using the pickle functionality
+------------------------------
+
+To be able to use the pickle functionality ``m.pickle(<path>)`` the methods ``getstate(self)`` and ``setstate(self, state)`` have to be provided. The convention for a ``state`` in ``GPy`` is a list of all parameters, which are needed to restore the model. All classes provided in ``GPy`` follow this convention, thus you can just append to the state of the inherited class and call the inherited class' ``setstate`` with the appropriate state.
+
+:py:meth:`~GPy.core.model.Model.getstate` :
+	This method returns a state of the model, following the memento pattern. As we are inheriting from :py:class:`~GPy.core.model.Model`, we have to return the state of Model as well. In out example we have `X` and `num_inputs` as state::
+
+		return Model.getstate(self) + [self.X, self.num_inputs]
+
+:py:meth:`~GPy.core.model.Model.setstate` :
+	This method restores this model with the given ``state``::
+
+		self.num_inputs = state.pop()
+		self.X = state.pop()
+		return Model.setstate(self, state)
\ No newline at end of file
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 3cea7fb7..4a466bae 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -1,3 +1,5 @@
+.. _interacting_with_models:
+
 *************************************
 Interacting with models
 *************************************
@@ -210,6 +212,6 @@ white_variance and noise_variance are tied together.::
 
 Further Reading
 ===============
-All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models. 
+All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read :ref:`creating_new_models`. 
 
-By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+By deafult, GPy uses the scg optimizer. To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.