diff --git a/GPy/__init__.py b/GPy/__init__.py
index b6bf81b7..819f54bf 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -5,6 +5,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 import core
 from core.parameterization import transformations, priors
+constraints = transformations
 import models
 import mappings
 import inference
@@ -17,6 +18,10 @@ from nose.tools import nottest
 import kern
 import plotting
 
+# Direct imports for convenience:
+from core import Model
+from core.parameterization import Param, Parameterized, ObsAr
+
 @nottest
 def tests():
     Tester(testing).test(verbose=10)
diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index f3111e60..7b010e6c 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -15,6 +15,7 @@ from parameterization.variational import VariationalPosterior
 from scipy.sparse.base import issparse
 
 import logging
+from GPy.util.normalizer import MeanNorm
 logger = logging.getLogger("GP")
 
 class GP(Model):
@@ -27,12 +28,17 @@ class GP(Model):
     :param likelihood: a GPy likelihood
     :param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
     :rtype: model object
+    :param Norm normalizer:
+        normalize the outputs Y.
+        Prediction will be un-normalized using this normalizer.
+        If normalizer is None, we will normalize using MeanNorm.
+        If normalizer is False, no normalization will be done.
 
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
 
     """
-    def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None):
+    def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
         super(GP, self).__init__(name)
 
         assert X.ndim == 2
@@ -44,8 +50,22 @@ class GP(Model):
 
         assert Y.ndim == 2
         logger.info("initializing Y")
-        if issparse(Y): self.Y = Y
-        else: self.Y = ObsAr(Y)
+
+        if normalizer is None:
+            self.normalizer = MeanNorm()
+        elif normalizer is False:
+            self.normalizer = None
+        else:
+            self.normalizer = normalizer
+
+        if self.normalizer is not None:
+            self.normalizer.scale_by(Y)
+            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
+            self.Y = Y
+        else:
+            self.Y = ObsAr(Y)
+            self.Y_normalized = self.Y
+
         assert Y.shape[0] == self.num_data
         _, self.output_dim = self.Y.shape
 
@@ -70,11 +90,11 @@ class GP(Model):
         self.inference_method = inference_method
 
         logger.info("adding kernel and likelihood as parameters")
-        self.add_parameter(self.kern)
-        self.add_parameter(self.likelihood)
+        self.link_parameter(self.kern)
+        self.link_parameter(self.likelihood)
 
     def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
         self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
         self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
 
@@ -139,6 +159,8 @@ class GP(Model):
         """
         #predict the latent function values
         mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
+        if self.normalizer is not None:
+            mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
 
         # now push through likelihood
         mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
@@ -146,6 +168,8 @@ class GP(Model):
 
     def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
         m, v = self._raw_predict(X,  full_cov=False)
+        if self.normalizer is not None:
+            m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
         return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
 
     def predictive_gradients(self, Xnew):
@@ -183,6 +207,8 @@ class GP(Model):
         :returns: Ysim: set of simulations, a Numpy array (N x samples).
         """
         m, v = self._raw_predict(X,  full_cov=full_cov)
+        if self.normalizer is not None:
+            m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
         v = v.reshape(m.size,-1) if len(v.shape)==3 else v
         if not full_cov:
             Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 7feb72b2..8c556da2 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -21,6 +21,10 @@ class Model(Parameterized):
         self.optimization_runs = []
         self.sampling_runs = []
         self.preferred_optimizer = 'bfgs'
+        from .parameterization.ties_and_remappings import Tie
+        self.tie = Tie()
+        self.link_parameter(self.tie, -1)
+        self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
 
     def log_likelihood(self):
         raise NotImplementedError, "this needs to be implemented to use the model class"
@@ -287,7 +291,7 @@ class Model(Parameterized):
             # just check the global ratio
             dx = np.zeros(x.shape)
             dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
-
+            
             # evaulate around the point x
             f1 = self._objective(x + dx)
             f2 = self._objective(x - dx)
@@ -368,4 +372,13 @@ class Model(Parameterized):
             self.optimizer_array = x
             return ret
 
+    def __str__(self):
+        model_details = [['Name', self.name],
+                         ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
+                         ["Number of Parameters", '{}'.format(self.size)]]
+        from operator import itemgetter
+        max_len = reduce(lambda a, b: max(len(b[0]), a), model_details, 0)
+        to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"]
+        to_print.append(super(Model, self).__str__())
+        return "\n".join(to_print)
 
diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py
index d5822825..fb8adf4c 100644
--- a/GPy/core/parameterization/param.py
+++ b/GPy/core/parameterization/param.py
@@ -143,31 +143,6 @@ class Param(Parameterizable, ObsAr):
         return self._raveled_index()
 
     #===========================================================================
-    # Index recreation
-    #===========================================================================
-    def _expand_index(self, slice_index=None):
-        # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
-        # it basically translates slices to their respective index arrays and turns negative indices around
-        # it tells you in the second return argument if it has only seen arrays as indices
-        if slice_index is None:
-            slice_index = self._current_slice_
-        def f(a):
-            a, b = a
-            if isinstance(a, numpy.ndarray) and a.dtype == bool:
-                raise ValueError, "Boolean indexing not implemented, use Param[np.where(index)] to index by boolean arrays!"
-            if a not in (slice(None), Ellipsis):
-                if isinstance(a, slice):
-                    start, stop, step = a.indices(b)
-                    return numpy.r_[start:stop:step]
-                elif isinstance(a, (list, numpy.ndarray, tuple)):
-                    a = numpy.asarray(a, dtype=int)
-                    a[a < 0] = b + a[a < 0]
-                elif a < 0:
-                    a = b + a
-                return numpy.r_[a]
-            return numpy.r_[:b]
-        return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
-    #===========================================================================
     # Constrainable
     #===========================================================================
     def _ensure_fixes(self):
@@ -246,21 +221,14 @@ class Param(Parameterizable, ObsAr):
         # get a int-array containing all indices in the first axis.
         if slice_index is None:
             slice_index = self._current_slice_
-        if isinstance(slice_index, (tuple, list)):
-            clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
-            for i in range(self._realndim_-len(clean_curr_slice)):
-                i+=1
-                clean_curr_slice += [range(self._realshape_[i])]
-            if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
-                and len(set(map(len, clean_curr_slice))) <= 1):
-                return numpy.fromiter(itertools.izip(*clean_curr_slice),
-                    dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_))
         try:
-            expanded_index = list(self._expand_index(slice_index))
-            indices = numpy.fromiter(itertools.product(*expanded_index),
-                 dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
+            indices = np.indices(self._realshape_, dtype=int)
+            indices = indices[(slice(None),)+slice_index]
+            indices = np.rollaxis(indices, 0, indices.ndim).reshape(-1,self._realndim_)
+            #print indices_
+            #if not np.all(indices==indices__):
+            #    import ipdb; ipdb.set_trace()
         except:
-            print "Warning: extended indexing was used"
             indices = np.indices(self._realshape_, dtype=int)
             indices = indices[(slice(None),)+slice_index]
             indices = np.rollaxis(indices, 0, indices.ndim)
diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 16c382b8..cae999d9 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -13,7 +13,7 @@ Observable Pattern for patameterization
 
 """
 
-from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
+from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
 import numpy as np
 import re
 import logging
@@ -58,22 +58,50 @@ class Observable(object):
 
     @property
     def updates(self):
-        p = getattr(self, '_highest_parent_', None)
-        if p is not None:
-            self._updates = p._updates
-        return self._updates
+        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
 
     @updates.setter
     def updates(self, ups):
-        assert isinstance(ups, bool), "updates are either on (True) or off (False)"
+        raise DeprecationWarning("updates is now a function, see update(True|False|None)")
+    
+    def update_model(self, updates=None):
+        """
+        Get or set, whether automatic updates are performed. When updates are
+        off, the model might be in a non-working state. To make the model work
+        turn updates on again.
+
+        :param bool|None updates:
+
+            bool: whether to do updates
+            None: get the current update state
+        """
+        if updates is None:
+            p = getattr(self, '_highest_parent_', None)
+            if p is not None:
+                self._updates = p._updates
+            return self._updates
+        assert isinstance(updates, bool), "updates are either on (True) or off (False)"
         p = getattr(self, '_highest_parent_', None)
         if p is not None:
-            p._updates = ups
+            p._updates = updates
         else:
-            self._updates = ups
-        if ups:
-            self._trigger_params_changed()
+            self._updates = updates
+        self.trigger_update()
+                            
+    def toggle_update(self):
+        self.update_model(not self.update())
 
+    def trigger_update(self):
+        """
+        Update the model from the current state.
+        Make sure that updates are on, otherwise this
+        method will do nothing
+        """
+        if not self.update_model():
+            #print "Warning: updates are off, updating the model will do nothing"
+            return
+        self._trigger_params_changed()
+                
     def add_observer(self, observer, callble, priority=0):
         """
         Add an observer `observer` with the callback `callble`
@@ -110,7 +138,7 @@ class Observable(object):
         :param min_priority: only notify observers with priority > min_priority
                              if min_priority is None, notify all observers in order
         """
-        if not self.updates:
+        if not self.update_model():
             return
         if which is None:
             which = self
@@ -511,6 +539,22 @@ class Indexable(Nameable, Observable):
             [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
             return ret
         return 0.
+    
+    #===========================================================================
+    # Tie parameters together
+    #===========================================================================
+    
+    def _has_ties(self):
+        if self._highest_parent_.tie.tied_param is None:
+            return False
+        if self.has_parent():
+            return self._highest_parent_.tie.label_buf[self._highest_parent_._raveled_index_for(self)].sum()>0
+        return True
+    
+    def tie_together(self):
+        self._highest_parent_.tie.add_tied_parameter(self)
+        self._highest_parent_._set_fixed(self,self._raveled_index())
+        self._trigger_params_changed()
 
     #===========================================================================
     # Constrain operations -> done
@@ -525,7 +569,8 @@ class Indexable(Nameable, Observable):
         Constrain the parameter to the given
         :py:class:`GPy.core.transformations.Transformation`.
         """
-        self.param_array[...] = transform.initialize(self.param_array)
+        if isinstance(transform, Transformation):
+            self.param_array[...] = transform.initialize(self.param_array)
         reconstrained = self.unconstrain()
         added = self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
         self.notify_observers(self, None if trigger_parent else -np.inf)
@@ -601,13 +646,13 @@ class Indexable(Nameable, Observable):
         """
         Helper preventing copy code.
         This adds the given what (transformation, prior etc) to parameter index operations which.
-        revonstrained are reconstrained indices.
+        reconstrained are reconstrained indices.
         warn when reconstraining parameters if warning is True.
         TODO: find out which parameters have changed specifically
         """
         if warning and reconstrained.size > 0:
             # TODO: figure out which parameters have changed and only print those
-            print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
+            print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
         index = self._raveled_index()
         which.add(what, index)
         return index
@@ -653,7 +698,7 @@ class OptimizationHandlable(Indexable):
         will be set accordingly. It has to be set with an array, retrieved from
         this method, as e.g. fixing will resize the array.
 
-        The optimizer should only interfere with this array, such that transofrmations
+        The optimizer should only interfere with this array, such that transformations
         are secured.
         """
         if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
@@ -662,12 +707,13 @@ class OptimizationHandlable(Indexable):
         if not self._optimizer_copy_transformed:
             self._optimizer_copy_.flat = self.param_array.flat
             [np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
-            if self.has_parent() and self.constraints[__fixed__].size != 0:
+            if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
                 fixes = np.ones(self.size).astype(bool)
                 fixes[self.constraints[__fixed__]] = FIXED
-                return self._optimizer_copy_[fixes]
+                return self._optimizer_copy_[np.logical_and(fixes, self._highest_parent_.tie.getTieFlag(self))]
             elif self._has_fixes():
-                return self._optimizer_copy_[self._fixes_]
+                    return self._optimizer_copy_[self._fixes_]
+
             self._optimizer_copy_transformed = True
 
         return self._optimizer_copy_
@@ -694,6 +740,7 @@ class OptimizationHandlable(Indexable):
             self.param_array.flat[f] = p
             [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
              for c, ind in self.constraints.iteritems() if c != __fixed__]
+        self._highest_parent_.tie.propagate_val()
 
         self._optimizer_copy_transformed = False
         self._trigger_params_changed()
@@ -726,6 +773,7 @@ class OptimizationHandlable(Indexable):
         Transform the gradients by multiplying the gradient factor for each
         constraint to it.
         """
+        self._highest_parent_.tie.collate_gradient()
         [np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
         if self._has_fixes(): return g[self._fixes_]
         return g
@@ -778,9 +826,15 @@ class OptimizationHandlable(Indexable):
         """
         # first take care of all parameters (from N(0,1))
         x = rand_gen(size=self._size_transformed(), *args, **kwargs)
-        # now draw from prior where possible
-        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
+        self.update_model(False) # Switch off the updates
         self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
+        # now draw from prior where possible
+        x = self.param_array.copy()
+        [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
+        unfixlist = np.ones((self.size,),dtype=np.bool)
+        unfixlist[self.constraints[__fixed__]] = False
+        self.param_array[unfixlist] = x[unfixlist]
+        self.update_model(True) 
 
     #===========================================================================
     # For shared memory arrays. This does nothing in Param, but sets the memory
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index e036d680..7b5911a5 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -82,7 +82,7 @@ class Parameterized(Parameterizable):
             self._fixes_ = None
         self._param_slices_ = []
         #self._connect_parameters()
-        self.add_parameters(*parameters)
+        self.link_parameters(*parameters)
 
     def build_pydot(self, G=None):
         import pydot  # @UnresolvedImport
@@ -110,7 +110,7 @@ class Parameterized(Parameterizable):
     #===========================================================================
     # Add remove parameters:
     #===========================================================================
-    def add_parameter(self, param, index=None, _ignore_added_names=False):
+    def link_parameter(self, param, index=None, _ignore_added_names=False):
         """
         :param parameters:  the parameters to add
         :type parameters:   list of or one :py:class:`GPy.core.param.Param`
@@ -122,8 +122,8 @@ class Parameterized(Parameterizable):
         at any given index using the :func:`list.insert` syntax
         """
         if param in self.parameters and index is not None:
-            self.remove_parameter(param)
-            self.add_parameter(param, index)
+            self.unlink_parameter(param)
+            self.link_parameter(param, index)
         # elif param.has_parent():
         #    raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
         elif param not in self.parameters:
@@ -132,7 +132,7 @@ class Parameterized(Parameterizable):
                     if parent is self:
                         raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
                 param.traverse_parents(visit, self)
-                param._parent_.remove_parameter(param)
+                param._parent_.unlink_parameter(param)
             # make sure the size is set
             if index is None:
                 start = sum(p.size for p in self.parameters)
@@ -168,14 +168,14 @@ class Parameterized(Parameterizable):
             raise HierarchyError, """Parameter exists already, try making a copy"""
 
 
-    def add_parameters(self, *parameters):
+    def link_parameters(self, *parameters):
         """
         convenience method for adding several
         parameters without gradient specification
         """
-        [self.add_parameter(p) for p in parameters]
+        [self.link_parameter(p) for p in parameters]
 
-    def remove_parameter(self, param):
+    def unlink_parameter(self, param):
         """
         :param param: param object to remove from being a parameter of this parameterized object.
         """
@@ -206,6 +206,11 @@ class Parameterized(Parameterizable):
         self._highest_parent_._connect_fixes()
         self._highest_parent_._notify_parent_change()
 
+    def add_parameter(self, *args, **kwargs):
+        raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables"
+    def remove_parameter(self, *args, **kwargs):
+        raise DeprecationWarning, "remove_parameter was renamed to link_parameter to avoid confusion of setting variables"
+
     def _connect_parameters(self, ignore_added_names=False):
         # connect parameterlist to this parameterized object
         # This just sets up the right connection for the params objects
@@ -294,7 +299,9 @@ class Parameterized(Parameterizable):
         if hasattr(self, "parameters"):
             try:
                 pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
-                if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+                if name in pnames:
+                    param = self.parameters[pnames.index(name)]
+                    param[:] = val; return
             except AttributeError:
                 pass
         object.__setattr__(self, name, val);
@@ -350,7 +357,6 @@ class Parameterized(Parameterizable):
     def _ties_str(self):
         return [','.join(x._ties_str) for x in self.flattened_parameters]
     def __str__(self, header=True):
-
         name = adjust_name_for_printing(self.name) + "."
         constrs = self._constraints_str;
         ts = self._ties_str
@@ -365,11 +371,9 @@ class Parameterized(Parameterizable):
         to_print = []
         for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
             to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
-        # to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self.parameters, constrs, ts)]
         sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
         if header:
             header = "  {{0:<{0}s}}  |  {{1:^{1}s}}  |  {{2:^{2}s}}  |  {{3:^{3}s}}  |  {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to")
-            # header += '\n' + sep
             to_print.insert(0, header)
         return '\n'.format(sep).join(to_print)
     pass
diff --git a/GPy/core/parameterization/ties_and_remappings.py b/GPy/core/parameterization/ties_and_remappings.py
index da46acaa..a81b8d61 100644
--- a/GPy/core/parameterization/ties_and_remappings.py
+++ b/GPy/core/parameterization/ties_and_remappings.py
@@ -31,59 +31,193 @@ class Fix(Remapping):
 
 
 
-class Tie(Remapping):
-    def __init__(self, value, name):
+class Tie(Parameterized):
+    """
+    The new parameter tie framework. (under development)
+    
+    All the parameters tied together get a new parameter inside the *Tie* object. 
+    Its value should always be equal to all the tied parameters, and its gradient
+    is the sum of all the tied parameters.
+    
+    =====Implementation Details=====
+    The *Tie* object should only exist on the top of param tree (the highest parent).
+    
+    self.label_buf:
+    It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
+    The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher 
+    than 0, and the parameters that have the same label are tied together.
+    
+    self.buf_index:
+    An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
+    
+    ================================
+    
+    TODO:
+    * EVERYTHING
+    
+    """
+    def __init__(self, name='tie'):
         super(Tie, self).__init__(name)
-        self.tied_parameters = []
-        self.value = Param('val', value)
-        self.add_parameter(self.value)
+        self.tied_param = None
+        # The buffer keeps track of tie status
+        self.label_buf = None
+        # The global indices of the 'tied' param
+        self.buf_idx = None
+        # A boolean array indicating non-tied parameters
+        self._tie_ = None
+        
+    def getTieFlag(self, p=None):
+        if self.tied_param is None:
+            if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
+                self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
+        if p is not None:
+            return self._tie_[p._highest_parent_._raveled_index_for(p)]
+        return self._tie_
+    
+    def _init_labelBuf(self):
+        if self.label_buf is None:
+            self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
+        if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
+            self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
+            
+    def _updateTieFlag(self):
+        if self._tie_.size != self.label_buf.size:
+            self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
+        self._tie_[self.label_buf>0] = False
+        self._tie_[self.buf_idx] = True
 
-    def add_tied_parameter(self, p):
-        self.tied_parameters.append(p)
-        p.add_observer(self, self.callback)
-        self.parameters_changed()
-
-    def callback(self, param=None, which=None):
+    def add_tied_parameter(self, p, p2=None):
         """
-        This gets called whenever any of the tied parameters changes. we spend
-        considerable effort working out what has changed and to what value.
-        Then we store that value in self.value, and broadcast it everywhere
-        with parameters_changed.
+        Tie the list of parameters p together (p2==None) or 
+        Tie the list of parameters p with the list of parameters p2 (p2!=None) 
         """
-        if which is self:return
-        index = self._highest_parent_.constraints[self]
-        if len(index)==0:
-            return # nothing to tie together, this tie exists without any tied parameters
-        self.collate_gradient()
-        vals = self._highest_parent_.param_array[index]
-        uvals = np.unique(vals)
-        if len(uvals)==1:
-            #all of the tied things are at the same value
-            if np.all(self.value==uvals[0]):
-                return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED!
-            self.value[...] = uvals[0]
-        elif len(uvals)==2:
-            #only *one* of the tied things has changed. it must be different to self.value
-            newval = uvals[uvals != self.value*1]
-            self.value[...] = newval
+        self._init_labelBuf()
+        if p2 is None:
+            idx = self._highest_parent_._raveled_index_for(p)
+            val = self._sync_val_group(idx)            
+            if np.all(self.label_buf[idx]==0):
+                # None of p has been tied before.
+                tie_idx = self._expandTieParam(1)
+                print tie_idx
+                tie_id = self.label_buf.max()+1
+                self.label_buf[tie_idx] = tie_id
+            else:
+                b = self.label_buf[idx]
+                ids = np.unique(b[b>0])
+                tie_id, tie_idx = self._merge_tie_param(ids)
+            self._highest_parent_.param_array[tie_idx] = val
+            idx = self._highest_parent_._raveled_index_for(p)
+            self.label_buf[idx] = tie_id
         else:
-            #more than one of the tied things changed. panic.
-            raise ValueError, "something is wrong with the tieing"
+            pass
+        self._updateTieFlag()
+        
+    def _merge_tie_param(self, ids):
+        """Merge the tie parameters with ids in the list."""
+        if len(ids)==1:
+            id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
+            return ids[0],id_final_idx
+        id_final = ids[0]
+        ids_rm = ids[1:]
+        label_buf_param = self.label_buf[self.buf_idx]
+        idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
+        self._removeTieParam(idx_param)
+        [np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
+        id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
+        return id_final, id_final_idx
+        
+    def _sync_val_group(self, idx):
+        self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
+        return self._highest_parent_.param_array[idx][0]
+        
+    def _expandTieParam(self, num):
+        """Expand the tie param with the number of *num* parameters"""
+        if self.tied_param is None:
+            new_buf = np.empty((num,))
+        else:
+            new_buf = np.empty((self.tied_param.size+num,))
+            new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
+            self.remove_parameter(self.tied_param)
+        self.tied_param = Param('tied',new_buf)
+        self.add_parameter(self.tied_param)
+        buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
+        self._expand_label_buf(self.buf_idx, buf_idx_new)
+        self.buf_idx = buf_idx_new
+        return self.buf_idx[-num:]
+
+    def _removeTieParam(self, idx):
+        """idx within tied_param"""
+        new_buf = np.empty((self.tied_param.size-len(idx),))
+        bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
+        bool_list[idx] = False
+        new_buf[:] = self.tied_param.param_array[bool_list]
+        self.remove_parameter(self.tied_param)
+        self.tied_param = Param('tied',new_buf)
+        self.add_parameter(self.tied_param)
+        buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
+        self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
+        self.buf_idx = buf_idx_new
+        
+    def _expand_label_buf(self, idx_old, idx_new):
+        """Expand label buffer accordingly"""
+        if idx_old is None:
+            self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
+        else:
+            bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
+            bool_old[idx_old] = True
+            bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
+            bool_new[idx_new] = True
+            label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
+            label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
+            label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
+            self.label_buf = label_buf_new
+
+    def _shrink_label_buf(self, idx_old, idx_new, bool_list):
+        bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
+        bool_old[idx_old] = True
+        bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
+        bool_new[idx_new] = True
+        label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
+        label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
+        label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
+        self.label_buf = label_buf_new
+
+    def _check_change(self):
+        changed = False
+        if self.tied_param is not None:
+            for i in xrange(self.tied_param.size):
+                b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
+                b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
+                if b.sum()==0:
+                    print 'XXX'
+                    continue
+                elif b.sum()==1:
+                    print '!!!'
+                    val = self._highest_parent_.param_array[b0][b][0]
+                    self._highest_parent_.param_array[b0] = val
+                else:
+                    print '@@@'
+                    self._highest_parent_.param_array[b0] = self.tied_param[i]
+                changed = True
+        return changed
+
     def parameters_changed(self):
         #ensure all out parameters have the correct value, as specified by our mapping
-        index = self._highest_parent_.constraints[self]
-        if np.all(self._highest_parent_.param_array[index]==self.value):
-            return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!!
-        self._highest_parent_.param_array[index] = self.mapping()
-        [p.notify_observers(which=self) for p in self.tied_parameters]
+        changed = self._check_change()
+        if changed:
+            self._highest_parent_._trigger_params_changed()
         self.collate_gradient()
 
-    def mapping(self):
-        return self.value
-
     def collate_gradient(self):
-        index = self._highest_parent_.constraints[self]
-        self.value.gradient = np.sum(self._highest_parent_.gradient[index])
+        if self.tied_param is not None:
+            self.tied_param.gradient = 0.
+            [np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum()) 
+                for i in xrange(self.tied_param.size)]
+    
+    def propagate_val(self):
+        if self.tied_param is not None:
+            for i in xrange(self.tied_param.size):
+                self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]
 
 
 
diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 9e0127a2..251ec7db 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -7,7 +7,7 @@ Created on 6 Nov 2013
 import numpy as np
 from parameterized import Parameterized
 from param import Param
-from transformations import Logexp, Logistic
+from transformations import Logexp, Logistic,__fixed__
 
 class VariationalPrior(Parameterized):
     def __init__(self, name='latent space', **kw):
@@ -35,28 +35,42 @@ class NormalPrior(VariationalPrior):
 
 class SpikeAndSlabPrior(VariationalPrior):
     def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
-        super(VariationalPrior, self).__init__(name=name, **kw)
-        self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
+        super(SpikeAndSlabPrior, self).__init__(name=name, **kw)        
         self.variance = Param('variance',variance)
         self.learnPi = learnPi
         if learnPi:
-            self.add_parameters(self.pi)
+            self.pi = Param('Pi', pi, Logistic(1e-10,1.-1e-10))
+        else:
+            self.pi = Param('Pi', pi, __fixed__)
+        self.link_parameter(self.pi)
+
 
     def KL_divergence(self, variational_posterior):
         mu = variational_posterior.mean
         S = variational_posterior.variance
         gamma = variational_posterior.binary_prob
+        if len(self.pi.shape)==2:
+            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+            pi = self.pi[idx]
+        else:
+            pi = self.pi
+            
         var_mean = np.square(mu)/self.variance
         var_S = (S/self.variance - np.log(S))
-        var_gamma = (gamma*np.log(gamma/self.pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-self.pi))).sum()
+        var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
         return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
 
     def update_gradients_KL(self, variational_posterior):
         mu = variational_posterior.mean
         S = variational_posterior.variance
         gamma = variational_posterior.binary_prob
+        if len(self.pi.shape)==2:
+            idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
+            pi = self.pi[idx]
+        else:
+            pi = self.pi
 
-        gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
+        gamma.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
         mu.gradient -= gamma*mu/self.variance
         S.gradient -= (1./self.variance - 1./S) * gamma /2.
         if self.learnPi:
@@ -65,7 +79,7 @@ class SpikeAndSlabPrior(VariationalPrior):
             elif len(self.pi.shape)==1:
                 self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
             else:
-                self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi))
+                self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))
 
 class VariationalPosterior(Parameterized):
     def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
@@ -75,7 +89,7 @@ class VariationalPosterior(Parameterized):
         self.ndim = self.mean.ndim
         self.shape = self.mean.shape
         self.num_data, self.input_dim = self.mean.shape
-        self.add_parameters(self.mean, self.variance)
+        self.link_parameters(self.mean, self.variance)
         self.num_data, self.input_dim = self.mean.shape
         if self.has_uncertain_inputs():
             assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
@@ -142,7 +156,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
         """
         super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
         self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
-        self.add_parameter(self.gamma)
+        self.link_parameter(self.gamma)
 
     def __getitem__(self, s):
         if isinstance(s, (int, slice, tuple, list, np.ndarray)):
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index cdf39a9b..6b923609 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -34,7 +34,7 @@ class SparseGP(GP):
 
     """
 
-    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None):
+    def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None, normalizer=False):
 
         #pick a sensible inference method
         if inference_method is None:
@@ -48,15 +48,15 @@ class SparseGP(GP):
         self.Z = Param('inducing inputs', Z)
         self.num_inducing = Z.shape[0]
 
-        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
+        GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
         logger.info("Adding Z as parameter")
-        self.add_parameter(self.Z, index=0)
+        self.link_parameter(self.Z, index=0)
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
 
     def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
+        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
         self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
         if isinstance(self.X, VariationalPosterior):
             #gradients wrt kernel
diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
new file mode 100644
index 00000000..cecbe667
--- /dev/null
+++ b/GPy/core/sparse_gp_mpi.py
@@ -0,0 +1,119 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from sparse_gp import SparseGP
+from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
+
+import logging
+logger = logging.getLogger("sparse gp mpi")
+
+class SparseGP_MPI(SparseGP):
+    """
+    A general purpose Sparse GP model with MPI parallelization support
+
+    This model allows (approximate) inference using variational DTC or FITC
+    (Gaussian likelihoods) as well as non-conjugate sparse methods based on
+    these.
+
+    :param X: inputs
+    :type X: np.ndarray (num_data x input_dim)
+    :param likelihood: a likelihood instance, containing the observed data
+    :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
+    :param kernel: the kernel (covariance function). See link kernels
+    :type kernel: a GPy.kern.kern instance
+    :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
+    :type X_variance: np.ndarray (num_data x input_dim) | None
+    :param Z: inducing inputs
+    :type Z: np.ndarray (num_inducing x input_dim)
+    :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :type num_inducing: int
+    :param mpi_comm: The communication group of MPI, e.g. mpi4py.MPI.COMM_WORLD
+    :type mpi_comm: mpi4py.MPI.Intracomm
+
+    """
+
+    def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
+        self._IN_OPTIMIZATION_ = False
+        if mpi_comm != None:
+            if inference_method is None:
+                inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
+            else:
+                assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
+                        
+        super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
+        self.update_model(False)
+        self.link_parameter(self.X, index=0)
+        if variational_prior is not None:
+            self.link_parameter(variational_prior)
+#         self.X.fix()
+
+        self.mpi_comm = mpi_comm
+        # Manage the data (Y) division
+        if mpi_comm != None:
+            from ..util.mpi import divide_data
+            N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
+            self.N_range = (N_start, N_end)
+            self.N_list = np.array(N_list)
+            self.Y_local = self.Y[N_start:N_end]
+            print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range)
+            mpi_comm.Bcast(self.param_array, root=0)
+        self.update_model(True)
+
+
+    def __getstate__(self):
+        dc = super(SparseGP_MPI, self).__getstate__()
+        dc['mpi_comm'] = None
+        if self.mpi_comm != None:
+            del dc['N_range']
+            del dc['N_list']
+            del dc['Y_local']
+        if 'normalizer' not in dc:
+            dc['normalizer'] = None
+            dc['Y_normalized'] = dc['Y']
+        return dc
+
+    #=====================================================
+    # The MPI parallelization 
+    #     - can move to model at some point
+    #=====================================================
+    
+    @SparseGP.optimizer_array.setter
+    def optimizer_array(self, p):
+        if self.mpi_comm != None:
+            if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
+                self.mpi_comm.Bcast(np.int32(1),root=0)
+            self.mpi_comm.Bcast(p, root=0)
+            
+        from ..util.debug import checkFinite
+        checkFinite(p, 'optimizer_array')
+        
+        SparseGP.optimizer_array.fset(self,p)
+        
+    def optimize(self, optimizer=None, start=None, **kwargs):
+        self._IN_OPTIMIZATION_ = True
+        if self.mpi_comm==None:
+            super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)
+        elif self.mpi_comm.rank==0:
+            super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)
+            self.mpi_comm.Bcast(np.int32(-1),root=0)
+        elif self.mpi_comm.rank>0:
+            x = self.optimizer_array.copy()
+            flag = np.empty(1,dtype=np.int32)
+            while True:
+                self.mpi_comm.Bcast(flag,root=0)
+                if flag==1:
+                    self.optimizer_array = x
+                elif flag==-1:
+                    break
+                else:
+                    self._IN_OPTIMIZATION_ = False
+                    raise Exception("Unrecognizable flag for synchronization!")
+        self._IN_OPTIMIZATION_ = False
+
+    def parameters_changed(self):
+        if isinstance(self.inference_method,VarDTC_minibatch):
+            update_gradients(self, mpi_comm=self.mpi_comm)
+        else:
+            super(SparseGP_MPI,self).parameters_changed()
+
diff --git a/GPy/core/symbolic.py b/GPy/core/symbolic.py
index a2d61911..c3e1a52c 100644
--- a/GPy/core/symbolic.py
+++ b/GPy/core/symbolic.py
@@ -127,7 +127,7 @@ class Symbolic_core():
                     val = parameters[theta.name]
             # Add parameter.
             
-            self.add_parameters(Param(theta.name, val, None))
+            self.link_parameters(Param(theta.name, val, None))
             #self._set_attribute(theta.name, )
 
     def eval_parameters_changed(self):
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 2a4b91b3..c4465061 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -387,7 +387,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
     print m
     return m
 
-def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True):
+def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=True):
     """Run a 1D example of a sparse GP regression."""
     # sample inputs and outputs
     X = np.random.uniform(-3., 3., (num_samples, 1))
@@ -396,7 +396,9 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
     rbf = GPy.kern.RBF(1)
     # create simple GP Model
     m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
-    m.checkgrad(verbose=1)
+
+    if checkgrad:
+        m.checkgrad(verbose=1)
 
     if optimize:
         m.optimize('tnc', messages=1, max_iters=max_iters)
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index 19fc0fa8..a7e2a800 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from posterior import Posterior
-from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
+from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
@@ -166,12 +166,20 @@ class VarDTC_minibatch(LatentFunctionInference):
         # Compute Common Components
         #======================================================================
         
+        from ...util.debug import checkFullRank
+        
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
+        r1 = checkFullRank(Kmm,name='Kmm')
         Lm = jitchol(Kmm)
+        LmInv = dtrtri(Lm)
         
+        #LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
+        r2 = checkFullRank(Lambda,name='Lambda')
+        if (not r1) or (not r2):
+            raise
         LL = jitchol(Lambda)
         LL = np.dot(Lm,LL)
         b,_ = dtrtrs(LL, psi1Y_full.T)
@@ -335,7 +343,13 @@ def update_gradients(model, mpi_comm=None):
         Y = model.Y_local
         X = model.X[model.N_range[0]:model.N_range[1]]
 
-    model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
+    try:
+        model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
+    except Exception:
+        if model.mpi_comm is None or model.mpi_comm.rank==0:
+            import time
+            model.pickle('model_'+str(int(time.time()))+'.pickle')
+        raise
     
     het_noise = model.likelihood.variance.size > 1
     
@@ -379,7 +393,7 @@ def update_gradients(model, mpi_comm=None):
     # Gather the gradients from multiple MPI nodes
     if mpi_comm != None:
         if het_noise:
-            assert False, "Not implemented!"
+            raise "het_noise not implemented!"
         kern_grad_all = kern_grad.copy()
         Z_grad_all = model.Z.gradient.copy()
         mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
@@ -404,10 +418,10 @@ def update_gradients(model, mpi_comm=None):
         mpi_comm.Allreduce([np.float64(KL_div), MPI.DOUBLE], [KL_div_all, MPI.DOUBLE])
         KL_div = KL_div_all
         [mpi_comm.Allgatherv([pp.copy(), MPI.DOUBLE], [pa, (model.N_list*pa.shape[-1], None), MPI.DOUBLE]) for pp,pa in zip(model.get_X_gradients(X),model.get_X_gradients(model.X))]
-        from ...models import SSGPLVM
-        if isinstance(model, SSGPLVM):
-            grad_pi = np.array(model.variational_prior.pi.gradient)
-            mpi_comm.Allreduce([grad_pi.copy(), MPI.DOUBLE], [model.variational_prior.pi.gradient, MPI.DOUBLE])
+#         from ...models import SSGPLVM
+#         if isinstance(model, SSGPLVM):
+#             grad_pi = np.array(model.variational_prior.pi.gradient)
+#             mpi_comm.Allreduce([grad_pi.copy(), MPI.DOUBLE], [model.variational_prior.pi.gradient, MPI.DOUBLE])
     model._log_marginal_likelihood -= KL_div
 
     # dL_dthetaL
diff --git a/GPy/kern/_src/ODE_UY.py b/GPy/kern/_src/ODE_UY.py
index 510b4f7c..b4a2b42d 100644
--- a/GPy/kern/_src/ODE_UY.py
+++ b/GPy/kern/_src/ODE_UY.py
@@ -17,7 +17,7 @@ class ODE_UY(Kern):
         self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
         self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
 
-        self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
+        self.link_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
 
     def K(self, X, X2=None):
         # model :   a * dy/dt + b * y = U
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 27f8ebd1..4c72a254 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -18,7 +18,7 @@ class Add(CombinationKernel):
             if isinstance(kern, Add):
                 del subkerns[i]
                 for part in kern.parts[::-1]:
-                    kern.remove_parameter(part)
+                    kern.unlink_parameter(part)
                     subkerns.insert(i, part)
 
         super(Add, self).__init__(subkerns, name)
@@ -171,10 +171,10 @@ class Add(CombinationKernel):
         if isinstance(other, Add):
             other_params = other.parameters[:]
             for p in other_params:
-                other.remove_parameter(p)
-            self.add_parameters(*other_params)
+                other.unlink_parameter(p)
+            self.link_parameters(*other_params)
         else:
-            self.add_parameter(other)
+            self.link_parameter(other)
         self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
         return self
 
diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py
index aeb11fa3..fd79973c 100644
--- a/GPy/kern/_src/brownian.py
+++ b/GPy/kern/_src/brownian.py
@@ -22,7 +22,7 @@ class Brownian(Kern):
         super(Brownian, self).__init__(input_dim, active_dims, name)
 
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameters(self.variance)
+        self.link_parameters(self.variance)
 
     def K(self,X,X2=None):
         if X2 is None:
diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index 7eccff3d..fc4a2f33 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -50,7 +50,7 @@ class Coregionalize(Kern):
         else:
             assert kappa.shape==(self.output_dim, )
         self.kappa = Param('kappa', kappa, Logexp())
-        self.add_parameters(self.W, self.kappa)
+        self.link_parameters(self.W, self.kappa)
 
     def parameters_changed(self):
         self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index d8377ffc..4fcbf31f 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -221,7 +221,7 @@ class CombinationKernel(Kern):
         # initialize the kernel with the full input_dim
         super(CombinationKernel, self).__init__(input_dim, active_dims, name)
         self.extra_dims = extra_dims
-        self.add_parameters(*kernels)
+        self.link_parameters(*kernels)
 
     @property
     def parts(self):
diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py
index 9fdacdbb..9d1a956b 100644
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@@ -49,7 +49,7 @@ class Linear(Kern):
                 variances = np.ones(self.input_dim)
 
         self.variances = Param('variances', variances, Logexp())
-        self.add_parameter(self.variances)
+        self.link_parameter(self.variances)
         self.psicomp = PSICOMP_Linear()
 
     @Cache_this(limit=2)
@@ -103,7 +103,7 @@ class Linear(Kern):
     def gradients_X_diag(self, dL_dKdiag, X):
         return 2.*self.variances*dL_dKdiag[:,None]*X
 
-    def input_sensitivity(self):
+    def input_sensitivity(self, summarize=True):
         return np.ones(self.input_dim) * self.variances
 
     #---------------------------------------#
@@ -144,7 +144,7 @@ class LinearFull(Kern):
 
         self.W = Param('W', W)
         self.kappa = Param('kappa', kappa, Logexp())
-        self.add_parameters(self.W, self.kappa)
+        self.link_parameters(self.W, self.kappa)
 
     def K(self, X, X2=None):
         P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index 0b561d4b..badbd60d 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -36,7 +36,7 @@ class MLP(Kern):
         self.variance = Param('variance', variance, Logexp())
         self.weight_variance = Param('weight_variance', weight_variance, Logexp())
         self.bias_variance = Param('bias_variance', bias_variance, Logexp())
-        self.add_parameters(self.variance, self.weight_variance, self.bias_variance)
+        self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
 
 
     def K(self, X, X2=None):
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 9f232ab0..e8e16506 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -33,7 +33,7 @@ class Periodic(Kern):
         self.variance = Param('variance', np.float64(variance), Logexp())
         self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
         self.period = Param('period', np.float64(period), Logexp())
-        self.add_parameters(self.variance, self.lengthscale, self.period)
+        self.link_parameters(self.variance, self.lengthscale, self.period)
 
     def _cos(self, alpha, omega, phase):
         def f(x):
diff --git a/GPy/kern/_src/poly.py b/GPy/kern/_src/poly.py
index d40f805c..4c5f0e93 100644
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@@ -14,7 +14,7 @@ class Poly(Kern):
     def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
         super(Poly, self).__init__(input_dim, active_dims, name)
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameter(self.variance)
+        self.link_parameter(self.variance)
         self.order=order
 
     def K(self, X, X2=None):
diff --git a/GPy/kern/_src/psi_comp/__init__.py b/GPy/kern/_src/psi_comp/__init__.py
index 7a5851fb..eced8392 100644
--- a/GPy/kern/_src/psi_comp/__init__.py
+++ b/GPy/kern/_src/psi_comp/__init__.py
@@ -29,6 +29,9 @@ class PSICOMP_RBF(Pickleable):
         else:
             raise ValueError, "unknown distriubtion received for psi-statistics"
         
+    def _setup_observers(self):
+        pass
+        
 class PSICOMP_Linear(Pickleable):
 
     @Cache_this(limit=2, ignore_args=(0,))
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index 7820c634..f4223bf4 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -11,7 +11,7 @@ class Static(Kern):
     def __init__(self, input_dim, variance, active_dims, name):
         super(Static, self).__init__(input_dim, active_dims, name)
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameters(self.variance)
+        self.link_parameters(self.variance)
 
     def Kdiag(self, X):
         ret = np.empty((X.shape[0],), dtype=np.float64)
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index f7993e82..04427c2c 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -61,7 +61,7 @@ class Stationary(Kern):
         self.lengthscale = Param('lengthscale', lengthscale, Logexp())
         self.variance = Param('variance', variance, Logexp())
         assert self.variance.size==1
-        self.add_parameters(self.variance, self.lengthscale)
+        self.link_parameters(self.variance, self.lengthscale)
 
     def K_of_r(self, r):
         raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
@@ -343,7 +343,7 @@ class RatQuad(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
         super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
         self.power = Param('power', power, Logexp())
-        self.add_parameters(self.power)
+        self.link_parameters(self.power)
 
     def K_of_r(self, r):
         r2 = np.power(r, 2.)
diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py
index a6436616..ae85c113 100644
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@@ -25,7 +25,7 @@ class Gamma(Likelihood):
         super(Gamma, self).__init__(gp_link, 'Gamma')
 
         self.beta = Param('beta', beta)
-        self.add_parameter(self.beta)
+        self.link_parameter(self.beta)
         self.beta.fix()#TODO: gradients!
 
     def pdf_link(self, link_f, y, Y_metadata=None):
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 6f08b4b4..4e10d3ef 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -40,7 +40,7 @@ class Gaussian(Likelihood):
         super(Gaussian, self).__init__(gp_link, name=name)
 
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameter(self.variance)
+        self.link_parameter(self.variance)
 
         if isinstance(gp_link, link_functions.Identity):
             self.log_concave = True
diff --git a/GPy/likelihoods/mixed_noise.py b/GPy/likelihoods/mixed_noise.py
index c2435508..613f069d 100644
--- a/GPy/likelihoods/mixed_noise.py
+++ b/GPy/likelihoods/mixed_noise.py
@@ -14,7 +14,7 @@ class MixedNoise(Likelihood):
         #NOTE at the moment this likelihood only works for using a list of gaussians
         super(Likelihood, self).__init__(name=name)
 
-        self.add_parameters(*likelihoods_list)
+        self.link_parameters(*likelihoods_list)
         self.likelihoods_list = likelihoods_list
         self.log_concave = False
 
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index c057e789..3aeb43e0 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -29,8 +29,8 @@ class StudentT(Likelihood):
         # sigma2 is not a noise parameter, it is a squared scale.
         self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
         self.v = Param('deg_free', float(deg_free))
-        self.add_parameter(self.sigma2)
-        self.add_parameter(self.v)
+        self.link_parameter(self.sigma2)
+        self.link_parameter(self.v)
         self.v.constrain_fixed()
 
         self.log_concave = False
diff --git a/GPy/mappings/additive.py b/GPy/mappings/additive.py
index fe352a83..5297982b 100644
--- a/GPy/mappings/additive.py
+++ b/GPy/mappings/additive.py
@@ -39,7 +39,7 @@ class Additive(Mapping):
         return self.mapping1._get_param_names + self.mapping2._get_param_names
 
     def _get_params(self):
-        return np.hstack((self.mapping1._get_params() self.mapping2._get_params()))
+        return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
 
     def _set_params(self, x):
         self.mapping1._set_params(x[:self.mapping1.num_params])
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index 24a45511..315dfc0e 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -24,7 +24,7 @@ class Linear(Bijective_mapping):
         Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
         self.W = Param('W',np.array((self.input_dim, self.output_dim)))
         self.bias = Param('bias',np.array(self.output_dim))
-        self.add_parameters(self.W, self.bias)
+        self.link_parameters(self.W, self.bias)
 
     def f(self, X):
         return np.dot(X,self.W) + self.bias
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 22a17f84..a4227119 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -25,9 +25,10 @@ class BayesianGPLVM(SparseGP):
 
     """
     def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, **kwargs):
+                 Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None):
         self.mpi_comm = mpi_comm
         self.__IN_OPTIMIZATION__ = False
+
         self.logger = logging.getLogger(self.__class__.__name__)
         if X == None:
             from ..util.initialization import initialize_latent
@@ -49,7 +50,7 @@ class BayesianGPLVM(SparseGP):
 
         if kernel is None:
             self.logger.info("initializing kernel RBF")
-            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) # + kern.white(input_dim)
+            kernel = kern.RBF(input_dim, lengthscale=1./fracs, ARD=True) #+ kern.Bias(input_dim) + kern.White(input_dim)
 
         if likelihood is None:
             likelihood = Gaussian()
@@ -71,13 +72,13 @@ class BayesianGPLVM(SparseGP):
                 inference_method = VarDTC()
         if isinstance(inference_method,VarDTC_minibatch):
             inference_method.mpi_comm = mpi_comm
-                
+
         if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
             kernel.psicomp.GPU_direct = True
 
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, normalizer=normalizer)
         self.logger.info("Adding X as parameter")
-        self.add_parameter(self.X, index=0)
+        self.link_parameter(self.X, index=0)
 
         if mpi_comm != None:
             from ..util.mpi import divide_data
@@ -91,7 +92,7 @@ class BayesianGPLVM(SparseGP):
     def set_X_gradients(self, X, X_grad):
         """Set the gradients of the posterior distribution of X in its specific form."""
         X.mean.gradient, X.variance.gradient = X_grad
-    
+
     def get_X_gradients(self, X):
         """Get the gradients of the posterior distribution of X in its specific form."""
         return X.mean.gradient, X.variance.gradient
@@ -218,22 +219,22 @@ class BayesianGPLVM(SparseGP):
             del dc['N_list']
             del dc['Y_local']
         return dc
- 
+
     def __setstate__(self, state):
         return super(BayesianGPLVM, self).__setstate__(state)
-    
+
     #=====================================================
-    # The MPI parallelization 
+    # The MPI parallelization
     #     - can move to model at some point
     #=====================================================
-    
+
     def _set_params_transformed(self, p):
         if self.mpi_comm != None:
             if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
                 self.mpi_comm.Bcast(np.int32(1),root=0)
             self.mpi_comm.Bcast(p, root=0)
         super(BayesianGPLVM, self)._set_params_transformed(p)
-        
+
     def optimize(self, optimizer=None, start=None, **kwargs):
         self.__IN_OPTIMIZATION__ = True
         if self.mpi_comm==None:
diff --git a/GPy/models/gp_kronecker_gaussian_regression.py b/GPy/models/gp_kronecker_gaussian_regression.py
index 0e8dab81..434661d2 100644
--- a/GPy/models/gp_kronecker_gaussian_regression.py
+++ b/GPy/models/gp_kronecker_gaussian_regression.py
@@ -35,12 +35,12 @@ class GPKroneckerGaussianRegression(Model):
         self.X2 = ObsAr(X2)
         self.Y = Y
         self.kern1, self.kern2 = kern1, kern2
-        self.add_parameter(self.kern1)
-        self.add_parameter(self.kern2)
+        self.link_parameter(self.kern1)
+        self.link_parameter(self.kern2)
 
         self.likelihood = likelihoods.Gaussian()
         self.likelihood.variance = noise_var
-        self.add_parameter(self.likelihood)
+        self.link_parameter(self.likelihood)
 
         self.num_data1, self.input_dim1 = self.X1.shape
         self.num_data2, self.input_dim2 = self.X2.shape
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index d56e72b9..7b8fb63f 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -15,17 +15,22 @@ class GPRegression(GP):
     :param X: input observations
     :param Y: observed values
     :param kernel: a GPy kernel, defaults to rbf
+    :param Norm normalizer: [False]
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
 
     .. Note:: Multiple independent outputs are allowed using columns of Y
 
     """
 
-    def __init__(self, X, Y, kernel=None, Y_metadata=None):
+    def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
 
         if kernel is None:
             kernel = kern.RBF(X.shape[1])
 
         likelihood = likelihoods.Gaussian()
 
-        super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
+        super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
 
diff --git a/GPy/models/gp_var_gauss.py b/GPy/models/gp_var_gauss.py
index 68b62443..cd688360 100644
--- a/GPy/models/gp_var_gauss.py
+++ b/GPy/models/gp_var_gauss.py
@@ -32,13 +32,13 @@ class GPVariationalGaussianApproximation(Model):
         if kernel is None:
             kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
         self.kern = kernel
-        self.add_parameter(self.kern)
+        self.link_parameter(self.kern)
         self.num_data, self.input_dim = self.X.shape
 
         self.alpha = Param('alpha', np.zeros(self.num_data))
         self.beta = Param('beta', np.ones(self.num_data))
-        self.add_parameter(self.alpha)
-        self.add_parameter(self.beta)
+        self.link_parameter(self.alpha)
+        self.link_parameter(self.beta)
 
         self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
         self.Ysign = np.where(Y==1, 1, -1).flatten()
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index 8f5432ba..79128270 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -38,7 +38,7 @@ class GPLVM(GP):
 
         super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
         self.X = Param('latent_mean', X)
-        self.add_parameter(self.X, index=0)
+        self.link_parameter(self.X, index=0)
 
     def parameters_changed(self):
         super(GPLVM, self).parameters_changed()
diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py
index b7c78449..74026f8e 100644
--- a/GPy/models/gradient_checker.py
+++ b/GPy/models/gradient_checker.py
@@ -76,7 +76,7 @@ class GradientChecker(Model):
 
         for name, xi in zip(self.names, at_least_one_element(x0)):
             self.__setattr__(name, Param(name, xi))
-            self.add_parameter(self.__getattribute__(name))
+            self.link_parameter(self.__getattribute__(name))
 #         self._param_names = []
 #         for name, shape in zip(self.names, self.shapes):
 #             self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 3acc7c6e..015df7bd 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -129,7 +129,7 @@ class MRD(SparseGP):
         else: likelihoods = likelihoods
 
         self.logger.info("adding X and Z")
-        self.add_parameters(self.X, self.Z)
+        self.link_parameters(self.X, self.Z)
 
         self.bgplvms = []
         self.num_data = Ylist[0].shape[0]
@@ -137,11 +137,11 @@ class MRD(SparseGP):
         for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
             assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
             p = Parameterized(name=n)
-            p.add_parameter(k)
+            p.link_parameter(k)
             p.kern = k
-            p.add_parameter(l)
+            p.link_parameter(l)
             p.likelihood = l
-            self.add_parameter(p)
+            self.link_parameter(p)
             self.bgplvms.append(p)
 
         self.posterior = None
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index f4d5513e..744de6e7 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -30,7 +30,7 @@ class SparseGPRegression(SparseGP):
 
     """
 
-    def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None):
+    def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None):
         num_data, input_dim = X.shape
 
         # kern defaults to rbf (plus white for stability)
@@ -49,7 +49,7 @@ class SparseGPRegression(SparseGP):
         if not (X_variance is None):
             X = NormalPosterior(X,X_variance)
 
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC(), normalizer=normalizer)
 
 class SparseGPRegressionUncertainInput(SparseGP):
     """
@@ -59,7 +59,7 @@ class SparseGPRegressionUncertainInput(SparseGP):
 
     """
 
-    def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10):
+    def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10, normalizer=None):
         """
         :param X: input observations
         :type X: np.ndarray (num_data x input_dim)
@@ -91,5 +91,5 @@ class SparseGPRegressionUncertainInput(SparseGP):
 
         likelihood = likelihoods.Gaussian()
 
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC())
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC(), normalizer=normalizer)
         self.ensure_default_constraints()
diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py
index ba793fc2..4ea4f297 100644
--- a/GPy/models/ss_gplvm.py
+++ b/GPy/models/ss_gplvm.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from ..core.sparse_gp import SparseGP
+from ..core.sparse_gp_mpi import SparseGP_MPI
 from .. import kern
 from ..likelihoods import Gaussian
 from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
@@ -11,7 +11,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import update_gradie
 from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
 from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
 
-class SSGPLVM(SparseGP):
+class SSGPLVM(SparseGP_MPI):
     """
     Spike-and-Slab Gaussian Process Latent Variable Model
 
@@ -24,10 +24,8 @@ class SSGPLVM(SparseGP):
 
     """
     def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
-                 Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True, **kwargs):
+                 Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
 
-        self.mpi_comm = mpi_comm
-        self.__IN_OPTIMIZATION__ = False
         self.group_spike = group_spike
         
         if X == None:
@@ -70,20 +68,11 @@ class SSGPLVM(SparseGP):
         self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
         
         X = SpikeAndSlabPosterior(X, X_variance, gamma)
-        
-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
-        self.add_parameter(self.X, index=0)
-        self.add_parameter(self.variational_prior)
                 
-        if mpi_comm != None:
-            from ..util.mpi import divide_data
-            N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
-            self.N_range = (N_start, N_end)
-            self.N_list = np.array(N_list)
-            self.Y_local = self.Y[N_start:N_end]
-            print 'MPI RANK: '+str(self.mpi_comm.rank)+' with datasize: '+str(self.N_range)
-            mpi_comm.Bcast(self.param_array, root=0)
-        
+        super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
+#         self.X.unfix()
+#         self.X.variance.constrain_positive()
+                
         if self.group_spike:
             [self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
         
@@ -96,11 +85,10 @@ class SSGPLVM(SparseGP):
         return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
 
     def parameters_changed(self):
-        if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
-            update_gradients(self, mpi_comm=self.mpi_comm)
+        super(SSGPLVM,self).parameters_changed()
+        if isinstance(self.inference_method, VarDTC_minibatch):
             return
         
-        super(SSGPLVM, self).parameters_changed()
         self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
 
         self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
@@ -121,47 +109,3 @@ class SSGPLVM(SparseGP):
 
         return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
 
-    def __getstate__(self):
-        dc = super(SSGPLVM, self).__getstate__()
-        dc['mpi_comm'] = None
-        if self.mpi_comm != None:
-            del dc['N_range']
-            del dc['N_list']
-            del dc['Y_local']
-        return dc
- 
-    def __setstate__(self, state):
-        return super(SSGPLVM, self).__setstate__(state)
-    
-    #=====================================================
-    # The MPI parallelization 
-    #     - can move to model at some point
-    #=====================================================
-    
-    def _set_params_transformed(self, p):
-        if self.mpi_comm != None:
-            if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
-                self.mpi_comm.Bcast(np.int32(1),root=0)
-            self.mpi_comm.Bcast(p, root=0)
-        super(SSGPLVM, self)._set_params_transformed(p)
-        
-    def optimize(self, optimizer=None, start=None, **kwargs):
-        self.__IN_OPTIMIZATION__ = True
-        if self.mpi_comm==None:
-            super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
-        elif self.mpi_comm.rank==0:
-            super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
-            self.mpi_comm.Bcast(np.int32(-1),root=0)
-        elif self.mpi_comm.rank>0:
-            x = self._get_params_transformed().copy()
-            flag = np.empty(1,dtype=np.int32)
-            while True:
-                self.mpi_comm.Bcast(flag,root=0)
-                if flag==1:
-                    self._set_params_transformed(x)
-                elif flag==-1:
-                    break
-                else:
-                    self.__IN_OPTIMIZATION__ = False
-                    raise Exception("Unrecognizable flag for synchronization!")
-        self.__IN_OPTIMIZATION__ = False
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index bac3dee0..1d5fdd61 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -56,6 +56,57 @@ def plot_latent(model, labels=None, which_indices=None,
         X = param_to_array(X)
 
 
+    if X.shape[0] > 1000:
+        print "Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape)
+        subsample = np.random.choice(X.shape[0], size=1000, replace=False)
+        X = X[subsample]
+        labels = labels[subsample]
+        #=======================================================================
+        #     <<<WORK IN PROGRESS>>>
+        #     <<<DO NOT DELETE>>>
+        #     plt.close('all')
+        #     fig, ax = plt.subplots(1,1)
+        #     from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
+        #     import matplotlib.patches as mpatches
+        #     i1, i2 = most_significant_input_dimensions(m, None)
+        #     xmin, xmax = 100, -100
+        #     ymin, ymax = 100, -100
+        #     legend_handles = []
+        #
+        #     X = m.X.mean[:, [i1, i2]]
+        #     X = m.X.variance[:, [i1, i2]]
+        #
+        #     xmin = X[:,0].min(); xmax = X[:,0].max()
+        #     ymin = X[:,1].min(); ymax = X[:,1].max()
+        #     range_ = [[xmin, xmax], [ymin, ymax]]
+        #     ul = np.unique(labels)
+        #
+        #     for i, l in enumerate(ul):
+        #         #cdict = dict(red  =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
+        #         #             green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
+        #         #             blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
+        #         #             alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
+        #         #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
+        #         cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
+        #         cmap._init()
+        #         #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
+        #         alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
+        #         cmap._lut[:, -1] = alphas
+        #         print l
+        #         x, y = X[labels==l].T
+        #
+        #         heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
+        #         #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
+        #
+        #         im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
+        #         legend_handles.append(mpatches.Patch(color=colors[i], label=l))
+        #     ax.set_xlim(xmin, xmax)
+        #     ax.set_ylim(ymin, ymax)
+        #     plt.legend(legend_handles, [l.get_label() for l in legend_handles])
+        #     plt.draw()
+        #     plt.show()
+        #=======================================================================
+
     # create a function which computes the shading of latent space according to the output variance
     def plot_function(x):
         Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index 60b5fa7c..f2082db0 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -68,21 +68,24 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=Non
 
     ard_params = np.atleast_2d(kernel.input_sensitivity(summarize=False))
     bottom = 0
+    last_bottom = bottom
+
     x = np.arange(kernel.input_dim)
 
-    if order is None:
-        order = kernel.parameter_names(recursive=False)
+    if filtering is None:
+        filtering = kernel.parameter_names(recursive=False)
 
     for i in range(ard_params.shape[0]):
-        if kernel.parameters[i].name in order:
+        if kernel.parameters[i].name in filtering:
             c = Tango.nextMedium()
             bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
-            bottom += ard_params[i,:]
+            last_bottom = ard_params[i,:]
+            bottom += last_bottom
         else:
             print "filtering out {}".format(kernel.parameters[i].name)
 
     ax.set_xlim(-.5, kernel.input_dim - .5)
-    add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
+    add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-last_bottom)
 
     if legend:
         if title is '':
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index a942dc49..83e1085c 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -51,7 +51,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
     """
     def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
-        self.add_parameter(self.kernel)
+        self.link_parameter(self.kernel)
 
     def parameters_changed(self):
         return self.kernel.update_gradients_full(self.dL_dK, self.X, self.X2)
@@ -64,7 +64,7 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
     """
     def __init__(self, kernel=None, dL_dK=None, X=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
-        self.add_parameter(self.kernel)
+        self.link_parameter(self.kernel)
 
     def log_likelihood(self):
         return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
@@ -77,7 +77,7 @@ class Kern_check_dK_dX(Kern_check_model):
     def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
         self.X = Param('X',X)
-        self.add_parameter(self.X)
+        self.link_parameter(self.X)
 
     def parameters_changed(self):
         self.X.gradient[:] =  self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 451ab757..42f82121 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -19,9 +19,10 @@ class MiscTests(unittest.TestCase):
         k = GPy.kern.RBF(1)
         m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
         m.randomize()
-        Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance)
+        m.likelihood.variance = .5
+        Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.likelihood.variance)
         K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
-        mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y)
+        mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(m.Y_normalized)
 
         mu, covar = m._raw_predict(self.X_new, full_cov=True)
         self.assertEquals(mu.shape, (self.N_new, self.D))
@@ -64,28 +65,28 @@ class MiscTests(unittest.TestCase):
         np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[:] = m[''].values()
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[''] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[''] = m['']
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m.kern.lengthscale.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m.Gaussian_noise.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m['.*var'] = 2
         m2['.*var'] = m['.*var']
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
 
     def test_likelihood_set(self):
@@ -431,6 +432,8 @@ class GradientTests(np.testing.TestCase):
         k1 = GPy.kern.RBF(1)  # + GPy.kern.White(1)
         k2 = GPy.kern.RBF(1)  # + GPy.kern.White(1)
         Y = np.random.randn(N1, N2)
+        Y = Y-Y.mean(0)
+        Y = Y/Y.std(0)
         m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
 
         # build the model the dumb way
diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py
index 05794dc3..fb9112f8 100644
--- a/GPy/testing/observable_tests.py
+++ b/GPy/testing/observable_tests.py
@@ -30,15 +30,15 @@ class Test(unittest.TestCase):
         self.par2 = ParameterizedTest('test model 2')
         self.p = Param('test parameter', numpy.random.normal(1,2,(10,3)))
 
-        self.par.add_parameter(self.p)
-        self.par.add_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
-        self.par.add_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
+        self.par.link_parameter(self.p)
+        self.par.link_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
+        self.par.link_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
 
-        self.par2.add_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
-        self.par2.add_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
+        self.par2.link_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
+        self.par2.link_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
 
-        self.parent.add_parameter(self.par)
-        self.parent.add_parameter(self.par2)
+        self.parent.link_parameter(self.par)
+        self.parent.link_parameter(self.par2)
 
         self._observer_triggered = None
         self._trigger_count = 0
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index c647c6eb..a51d9e09 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -37,8 +37,8 @@ class ParameterizedTest(unittest.TestCase):
         self.test1 = GPy.core.Parameterized("test model")
         self.test1.param = self.param
         self.test1.kern = self.rbf+self.white
-        self.test1.add_parameter(self.test1.kern)
-        self.test1.add_parameter(self.param, 0)
+        self.test1.link_parameter(self.test1.kern)
+        self.test1.link_parameter(self.param, 0)
         # print self.test1:
         #=============================================================================
         # test_model.          |    Value    |  Constraint   |  Prior  |  Tied to
@@ -67,11 +67,11 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_fixes(self):
         self.white.fix(warning=False)
-        self.test1.remove_parameter(self.param)
+        self.test1.unlink_parameter(self.param)
         self.assertTrue(self.test1._has_fixes())
         from GPy.core.parameterization.transformations import FIXED, UNFIXED
         self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
-        self.test1.kern.add_parameter(self.white, 0)
+        self.test1.kern.link_parameter(self.white, 0)
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
         self.test1.kern.rbf.fix()
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
@@ -82,7 +82,7 @@ class ParameterizedTest(unittest.TestCase):
     def test_remove_parameter(self):
         from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
         self.white.fix()
-        self.test1.kern.remove_parameter(self.white)
+        self.test1.kern.unlink_parameter(self.white)
         self.assertIs(self.test1._fixes_,None)
 
         self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
@@ -90,7 +90,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
 
-        self.test1.add_parameter(self.white, 0)
+        self.test1.link_parameter(self.white, 0)
         self.assertIs(self.test1.constraints, self.white.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
@@ -98,7 +98,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertIs(self.white._fixes_,None)
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
 
-        self.test1.remove_parameter(self.white)
+        self.test1.unlink_parameter(self.white)
         self.assertIs(self.test1._fixes_,None)
         self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
@@ -107,11 +107,11 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_remove_parameter_param_array_grad_array(self):
         val = self.test1.kern.param_array.copy()
-        self.test1.kern.remove_parameter(self.white)
+        self.test1.kern.unlink_parameter(self.white)
         self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
 
     def test_add_parameter_already_in_hirarchy(self):
-        self.assertRaises(HierarchyError, self.test1.add_parameter, self.white.parameters[0])
+        self.assertRaises(HierarchyError, self.test1.link_parameter, self.white.parameters[0])
 
     def test_default_constraints(self):
         self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
@@ -119,7 +119,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
         from GPy.core.parameterization.transformations import Logexp
         kern = self.test1.kern
-        self.test1.remove_parameter(kern)
+        self.test1.unlink_parameter(kern)
         self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
 
     def test_constraints(self):
@@ -127,7 +127,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
 
-        self.test1.kern.remove_parameter(self.rbf)
+        self.test1.kern.unlink_parameter(self.rbf)
         self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
 
     def test_constraints_views(self):
@@ -152,6 +152,12 @@ class ParameterizedTest(unittest.TestCase):
         self.test1.kern.randomize()
         self.assertEqual(val, self.rbf.variance)
 
+    def test_updates(self):
+        self.test1.update_model(False)
+        val = float(self.rbf.variance)
+        self.test1.kern.randomize()
+        self.assertEqual(val, self.rbf.variance)
+
     def test_fixing_optimize(self):
         self.testmodel.kern.lengthscale.fix()
         val = float(self.testmodel.kern.lengthscale)
@@ -160,7 +166,7 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_add_parameter_in_hierarchy(self):
         from GPy.core import Param
-        self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
+        self.test1.kern.rbf.link_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
         self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index d51352fe..dfabe54e 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -108,7 +108,7 @@ class Test(ListDictTestCase):
         par = toy_rbf_1d_50(optimize=0, plot=0)
         pcopy = par.copy()
         self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
-        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
+        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
         self.assertSequenceEqual(str(par), str(pcopy))
         self.assertIsNot(par.param_array, pcopy.param_array)
         self.assertIsNot(par.gradient_full, pcopy.gradient_full)
@@ -141,7 +141,7 @@ class Test(ListDictTestCase):
             f.seek(0)
             pcopy = pickle.load(f)
         np.testing.assert_allclose(par.param_array, pcopy.param_array)
-        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
+        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full, atol=1e-6)
         self.assertSequenceEqual(str(par), str(pcopy))
         self.assert_(pcopy.checkgrad())
 
diff --git a/GPy/util/debug.py b/GPy/util/debug.py
new file mode 100644
index 00000000..b676d028
--- /dev/null
+++ b/GPy/util/debug.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+"""
+The module for some general debug tools
+"""
+
+import numpy as np
+
+def checkFinite(arr, name=None):
+    if name is None:
+        name = 'Array with ID['+str(id(arr))+']'
+        
+    if np.any(np.logical_not(np.isfinite(arr))):
+        idx = np.where(np.logical_not(np.isfinite(arr)))[0]
+        print name+' at indices '+str(idx)+' have not finite values: '+str(arr[idx])+'!'
+        return False
+    return True
+
+def checkFullRank(m, tol=1e-10, name=None, force_check=False):
+    if name is None:
+        name = 'Matrix with ID['+str(id(m))+']'
+    assert len(m.shape)==2 and m.shape[0]==m.shape[1], 'The input of checkFullRank has to be a square matrix!'
+    
+    if not force_check and m.shape[0]>=10000:
+        print 'The size of '+name+'is too big to check (>=10000)!'
+        return True
+    
+    s = np.real(np.linalg.eigvals(m))
+    
+    if s.min()/s.max()<tol:
+        print name+' is close to singlar!'
+        print 'The eigen values of '+name+' is '+str(s)
+        return False
+    return True
+    
\ No newline at end of file
diff --git a/GPy/util/linalg_gpu.py b/GPy/util/linalg_gpu.py
index d969d14f..cba09dd3 100644
--- a/GPy/util/linalg_gpu.py
+++ b/GPy/util/linalg_gpu.py
@@ -68,26 +68,5 @@ try:
 except:
     pass
 
-def jitchol(A, L, cublas_handle, maxtries=5):
-    try:
-        cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
-        culinalg.cho_factor(L,'L')
-    except culaExceptions:
-        
-        
-        diagA = np.diag(A)
-        if np.any(diagA <= 0.):
-            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
-        jitter = diagA.mean() * 1e-6
-        while maxtries > 0 and np.isfinite(jitter):
-            print 'Warning: adding jitter of {:.10e}'.format(jitter)
-            try:
-                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
-            except:
-                jitter *= 10
-            finally:
-                maxtries -= 1
-        raise linalg.LinAlgError, "not positive definite, even with jitter."
-    
     
     
diff --git a/GPy/util/normalizer.py b/GPy/util/normalizer.py
new file mode 100644
index 00000000..854726c4
--- /dev/null
+++ b/GPy/util/normalizer.py
@@ -0,0 +1,45 @@
+'''
+Created on Aug 27, 2014
+
+@author: t-mazwie
+'''
+import logging
+import numpy as np
+
+class Norm(object):
+    def __init__(self):
+        pass
+    def scale_by(self, Y):
+        """
+        Use data matrix Y as normalization space to work in.
+        """
+        raise NotImplementedError
+    def normalize(self, Y):
+        """
+        Project Y into normalized space
+        """
+        raise NotImplementedError
+    def inverse_mean(self, X):
+        """
+        Project the normalized object X into space of Y
+        """
+        raise NotImplementedError
+    def inverse_variance(self, var):
+        return var
+    def scaled(self):
+        """
+        Whether this Norm object has been initialized.
+        """
+        raise NotImplementedError
+class MeanNorm(Norm):
+    def __init__(self):
+        self.mean = None
+    def scale_by(self, Y):
+        Y = np.ma.masked_invalid(Y, copy=False)
+        self.mean = Y.mean(0).view(np.ndarray)
+    def normalize(self, Y):
+        return Y-self.mean
+    def inverse_mean(self, X):
+        return X+self.mean
+    def scaled(self):
+        return self.mean is not None
diff --git a/doc/GPy.core.parameterization.rst b/doc/GPy.core.parameterization.rst
new file mode 100644
index 00000000..4877a06d
--- /dev/null
+++ b/doc/GPy.core.parameterization.rst
@@ -0,0 +1,102 @@
+GPy.core.parameterization package
+=================================
+
+Submodules
+----------
+
+GPy.core.parameterization.domains module
+----------------------------------------
+
+.. automodule:: GPy.core.parameterization.domains
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.index_operations module
+-------------------------------------------------
+
+.. automodule:: GPy.core.parameterization.index_operations
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.lists_and_dicts module
+------------------------------------------------
+
+.. automodule:: GPy.core.parameterization.lists_and_dicts
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.observable_array module
+-------------------------------------------------
+
+.. automodule:: GPy.core.parameterization.observable_array
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.param module
+--------------------------------------
+
+.. automodule:: GPy.core.parameterization.param
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.parameter_core module
+-----------------------------------------------
+
+.. automodule:: GPy.core.parameterization.parameter_core
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.parameterized module
+----------------------------------------------
+
+.. automodule:: GPy.core.parameterization.parameterized
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.priors module
+---------------------------------------
+
+.. automodule:: GPy.core.parameterization.priors
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.ties_and_remappings module
+----------------------------------------------------
+
+.. automodule:: GPy.core.parameterization.ties_and_remappings
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.transformations module
+------------------------------------------------
+
+.. automodule:: GPy.core.parameterization.transformations
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.core.parameterization.variational module
+--------------------------------------------
+
+.. automodule:: GPy.core.parameterization.variational
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.core.parameterization
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.core.rst b/doc/GPy.core.rst
index c4f1849d..3c236612 100644
--- a/doc/GPy.core.rst
+++ b/doc/GPy.core.rst
@@ -1,25 +1,16 @@
 GPy.core package
 ================
 
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.core.parameterization
+
 Submodules
 ----------
 
-GPy.core.domains module
------------------------
-
-.. automodule:: GPy.core.domains
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.core.fitc module
---------------------
-
-.. automodule:: GPy.core.fitc
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.core.gp module
 ------------------
 
@@ -28,14 +19,6 @@ GPy.core.gp module
     :undoc-members:
     :show-inheritance:
 
-GPy.core.gp_base module
------------------------
-
-.. automodule:: GPy.core.gp_base
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.core.mapping module
 -----------------------
 
@@ -52,22 +35,6 @@ GPy.core.model module
     :undoc-members:
     :show-inheritance:
 
-GPy.core.parameterized module
------------------------------
-
-.. automodule:: GPy.core.parameterized
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.core.priors module
-----------------------
-
-.. automodule:: GPy.core.priors
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.core.sparse_gp module
 -------------------------
 
@@ -76,6 +43,14 @@ GPy.core.sparse_gp module
     :undoc-members:
     :show-inheritance:
 
+GPy.core.sparse_gp_mpi module
+-----------------------------
+
+.. automodule:: GPy.core.sparse_gp_mpi
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.core.svigp module
 ---------------------
 
@@ -84,10 +59,10 @@ GPy.core.svigp module
     :undoc-members:
     :show-inheritance:
 
-GPy.core.transformations module
--------------------------------
+GPy.core.symbolic module
+------------------------
 
-.. automodule:: GPy.core.transformations
+.. automodule:: GPy.core.symbolic
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index 4fd3528f..7fc8a123 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -12,6 +12,14 @@ GPy.examples.classification module
     :undoc-members:
     :show-inheritance:
 
+GPy.examples.coreg_example module
+---------------------------------
+
+.. automodule:: GPy.examples.coreg_example
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.examples.dimensionality_reduction module
 --------------------------------------------
 
@@ -20,6 +28,14 @@ GPy.examples.dimensionality_reduction module
     :undoc-members:
     :show-inheritance:
 
+GPy.examples.non_gaussian module
+--------------------------------
+
+.. automodule:: GPy.examples.non_gaussian
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.examples.regression module
 ------------------------------
 
diff --git a/doc/GPy.inference.latent_function_inference.rst b/doc/GPy.inference.latent_function_inference.rst
new file mode 100644
index 00000000..c47da33a
--- /dev/null
+++ b/doc/GPy.inference.latent_function_inference.rst
@@ -0,0 +1,94 @@
+GPy.inference.latent_function_inference package
+===============================================
+
+Submodules
+----------
+
+GPy.inference.latent_function_inference.dtc module
+--------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.dtc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.exact_gaussian_inference module
+-----------------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.exact_gaussian_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.expectation_propagation module
+----------------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.expectation_propagation
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.expectation_propagation_dtc module
+--------------------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.expectation_propagation_dtc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.fitc module
+---------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.laplace module
+------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.laplace
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.posterior module
+--------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.posterior
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_dtc module
+------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_dtc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_dtc_gpu module
+----------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_dtc_gpu
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.latent_function_inference.var_dtc_parallel module
+---------------------------------------------------------------
+
+.. automodule:: GPy.inference.latent_function_inference.var_dtc_parallel
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.inference.latent_function_inference
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.inference.optimization.rst b/doc/GPy.inference.optimization.rst
new file mode 100644
index 00000000..83339202
--- /dev/null
+++ b/doc/GPy.inference.optimization.rst
@@ -0,0 +1,78 @@
+GPy.inference.optimization package
+==================================
+
+Submodules
+----------
+
+GPy.inference.optimization.BayesOpt module
+------------------------------------------
+
+.. automodule:: GPy.inference.optimization.BayesOpt
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.conjugate_gradient_descent module
+------------------------------------------------------------
+
+.. automodule:: GPy.inference.optimization.conjugate_gradient_descent
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.gradient_descent_update_rules module
+---------------------------------------------------------------
+
+.. automodule:: GPy.inference.optimization.gradient_descent_update_rules
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.hmc module
+-------------------------------------
+
+.. automodule:: GPy.inference.optimization.hmc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.optimization module
+----------------------------------------------
+
+.. automodule:: GPy.inference.optimization.optimization
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.samplers module
+------------------------------------------
+
+.. automodule:: GPy.inference.optimization.samplers
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.scg module
+-------------------------------------
+
+.. automodule:: GPy.inference.optimization.scg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.inference.optimization.sgd module
+-------------------------------------
+
+.. automodule:: GPy.inference.optimization.sgd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.inference.optimization
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.inference.rst b/doc/GPy.inference.rst
index 28f42994..2aa7839f 100644
--- a/doc/GPy.inference.rst
+++ b/doc/GPy.inference.rst
@@ -1,57 +1,13 @@
 GPy.inference package
 =====================
 
-Submodules
-----------
+Subpackages
+-----------
 
-GPy.inference.conjugate_gradient_descent module
------------------------------------------------
-
-.. automodule:: GPy.inference.conjugate_gradient_descent
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.inference.gradient_descent_update_rules module
---------------------------------------------------
-
-.. automodule:: GPy.inference.gradient_descent_update_rules
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.inference.optimization module
----------------------------------
-
-.. automodule:: GPy.inference.optimization
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.inference.samplers module
------------------------------
-
-.. automodule:: GPy.inference.samplers
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.inference.scg module
-------------------------
-
-.. automodule:: GPy.inference.scg
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.inference.sgd module
-------------------------
-
-.. automodule:: GPy.inference.sgd
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+    GPy.inference.latent_function_inference
+    GPy.inference.optimization
 
 Module contents
 ---------------
diff --git a/doc/GPy.kern.rst b/doc/GPy.kern.rst
index b4b9d9aa..9ee59ed6 100644
--- a/doc/GPy.kern.rst
+++ b/doc/GPy.kern.rst
@@ -1,33 +1,6 @@
 GPy.kern package
 ================
 
-Subpackages
------------
-
-.. toctree::
-
-    GPy.kern.parts
-
-Submodules
-----------
-
-GPy.kern.constructors module
-----------------------------
-
-.. automodule:: GPy.kern.constructors
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.kern.kern module
---------------------
-
-.. automodule:: GPy.kern.kern
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
 Module contents
 ---------------
 
diff --git a/doc/GPy.likelihoods.rst b/doc/GPy.likelihoods.rst
index c3da2650..70679454 100644
--- a/doc/GPy.likelihoods.rst
+++ b/doc/GPy.likelihoods.rst
@@ -1,28 +1,29 @@
 GPy.likelihoods package
 =======================
 
-Subpackages
------------
-
-.. toctree::
-
-    GPy.likelihoods.noise_models
-
 Submodules
 ----------
 
-GPy.likelihoods.ep module
--------------------------
+GPy.likelihoods.bernoulli module
+--------------------------------
 
-.. automodule:: GPy.likelihoods.ep
+.. automodule:: GPy.likelihoods.bernoulli
     :members:
     :undoc-members:
     :show-inheritance:
 
-GPy.likelihoods.ep_mixed_noise module
--------------------------------------
+GPy.likelihoods.exponential module
+----------------------------------
 
-.. automodule:: GPy.likelihoods.ep_mixed_noise
+.. automodule:: GPy.likelihoods.exponential
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.gamma module
+----------------------------
+
+.. automodule:: GPy.likelihoods.gamma
     :members:
     :undoc-members:
     :show-inheritance:
@@ -35,14 +36,6 @@ GPy.likelihoods.gaussian module
     :undoc-members:
     :show-inheritance:
 
-GPy.likelihoods.gaussian_mixed_noise module
--------------------------------------------
-
-.. automodule:: GPy.likelihoods.gaussian_mixed_noise
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.likelihoods.likelihood module
 ---------------------------------
 
@@ -51,10 +44,82 @@ GPy.likelihoods.likelihood module
     :undoc-members:
     :show-inheritance:
 
-GPy.likelihoods.noise_model_constructors module
------------------------------------------------
+GPy.likelihoods.link_functions module
+-------------------------------------
 
-.. automodule:: GPy.likelihoods.noise_model_constructors
+.. automodule:: GPy.likelihoods.link_functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.mixed_noise module
+----------------------------------
+
+.. automodule:: GPy.likelihoods.mixed_noise
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.negative_binomial module
+----------------------------------------
+
+.. automodule:: GPy.likelihoods.negative_binomial
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.ordinal module
+------------------------------
+
+.. automodule:: GPy.likelihoods.ordinal
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.poisson module
+------------------------------
+
+.. automodule:: GPy.likelihoods.poisson
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.skew_exponential module
+---------------------------------------
+
+.. automodule:: GPy.likelihoods.skew_exponential
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.skew_normal module
+----------------------------------
+
+.. automodule:: GPy.likelihoods.skew_normal
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.sstudent_t module
+---------------------------------
+
+.. automodule:: GPy.likelihoods.sstudent_t
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.student_t module
+--------------------------------
+
+.. automodule:: GPy.likelihoods.student_t
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.likelihoods.symbolic module
+-------------------------------
+
+.. automodule:: GPy.likelihoods.symbolic
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.mappings.rst b/doc/GPy.mappings.rst
index c48cb06e..e5d89872 100644
--- a/doc/GPy.mappings.rst
+++ b/doc/GPy.mappings.rst
@@ -4,6 +4,14 @@ GPy.mappings package
 Submodules
 ----------
 
+GPy.mappings.additive module
+----------------------------
+
+.. automodule:: GPy.mappings.additive
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.mappings.kernel module
 --------------------------
 
@@ -28,6 +36,14 @@ GPy.mappings.mlp module
     :undoc-members:
     :show-inheritance:
 
+GPy.mappings.symbolic module
+----------------------------
+
+.. automodule:: GPy.mappings.symbolic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 
 Module contents
 ---------------
diff --git a/doc/GPy.models.rst b/doc/GPy.models.rst
index 4440513e..5ee7e3a9 100644
--- a/doc/GPy.models.rst
+++ b/doc/GPy.models.rst
@@ -20,14 +20,6 @@ GPy.models.bcgplvm module
     :undoc-members:
     :show-inheritance:
 
-GPy.models.fitc_classification module
--------------------------------------
-
-.. automodule:: GPy.models.fitc_classification
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.models.gp_classification module
 -----------------------------------
 
@@ -36,6 +28,30 @@ GPy.models.gp_classification module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.gp_coregionalized_regression module
+----------------------------------------------
+
+.. automodule:: GPy.models.gp_coregionalized_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.models.gp_heteroscedastic_regression module
+-----------------------------------------------
+
+.. automodule:: GPy.models.gp_heteroscedastic_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.models.gp_kronecker_gaussian_regression module
+--------------------------------------------------
+
+.. automodule:: GPy.models.gp_kronecker_gaussian_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.gp_multioutput_regression module
 -------------------------------------------
 
@@ -52,6 +68,14 @@ GPy.models.gp_regression module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.gp_var_gauss module
+------------------------------
+
+.. automodule:: GPy.models.gp_var_gauss
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.gplvm module
 -----------------------
 
@@ -84,6 +108,14 @@ GPy.models.sparse_gp_classification module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.sparse_gp_coregionalized_regression module
+-----------------------------------------------------
+
+.. automodule:: GPy.models.sparse_gp_coregionalized_regression
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.sparse_gp_multioutput_regression module
 --------------------------------------------------
 
@@ -108,6 +140,22 @@ GPy.models.sparse_gplvm module
     :undoc-members:
     :show-inheritance:
 
+GPy.models.ss_gplvm module
+--------------------------
+
+.. automodule:: GPy.models.ss_gplvm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.models.ss_mrd module
+------------------------
+
+.. automodule:: GPy.models.ss_mrd
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.models.svigp_regression module
 ----------------------------------
 
diff --git a/doc/GPy.plotting.matplot_dep.latent_space_visualizations.controllers.rst b/doc/GPy.plotting.matplot_dep.latent_space_visualizations.controllers.rst
new file mode 100644
index 00000000..71826ed6
--- /dev/null
+++ b/doc/GPy.plotting.matplot_dep.latent_space_visualizations.controllers.rst
@@ -0,0 +1,30 @@
+GPy.plotting.matplot_dep.latent_space_visualizations.controllers package
+========================================================================
+
+Submodules
+----------
+
+GPy.plotting.matplot_dep.latent_space_visualizations.controllers.axis_event_controller module
+---------------------------------------------------------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.latent_space_visualizations.controllers.axis_event_controller
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.latent_space_visualizations.controllers.imshow_controller module
+-----------------------------------------------------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.latent_space_visualizations.controllers.imshow_controller
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.plotting.matplot_dep.latent_space_visualizations.controllers
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.plotting.matplot_dep.latent_space_visualizations.rst b/doc/GPy.plotting.matplot_dep.latent_space_visualizations.rst
new file mode 100644
index 00000000..6e5cf4bd
--- /dev/null
+++ b/doc/GPy.plotting.matplot_dep.latent_space_visualizations.rst
@@ -0,0 +1,17 @@
+GPy.plotting.matplot_dep.latent_space_visualizations package
+============================================================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.plotting.matplot_dep.latent_space_visualizations.controllers
+
+Module contents
+---------------
+
+.. automodule:: GPy.plotting.matplot_dep.latent_space_visualizations
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.plotting.matplot_dep.rst b/doc/GPy.plotting.matplot_dep.rst
new file mode 100644
index 00000000..77780708
--- /dev/null
+++ b/doc/GPy.plotting.matplot_dep.rst
@@ -0,0 +1,141 @@
+GPy.plotting.matplot_dep package
+================================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.plotting.matplot_dep.latent_space_visualizations
+
+Submodules
+----------
+
+GPy.plotting.matplot_dep.Tango module
+-------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.Tango
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.base_plots module
+------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.base_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.dim_reduction_plots module
+---------------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.dim_reduction_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.img_plots module
+-----------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.img_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.inference_plots module
+-----------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.inference_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.kernel_plots module
+--------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.kernel_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.mapping_plots module
+---------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.mapping_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.maps module
+------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.maps
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.models_plots module
+--------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.models_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.netpbmfile module
+------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.netpbmfile
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.priors_plots module
+--------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.priors_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.ssgplvm module
+---------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.ssgplvm
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.svig_plots module
+------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.svig_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.variational_plots module
+-------------------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.variational_plots
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.plotting.matplot_dep.visualize module
+-----------------------------------------
+
+.. automodule:: GPy.plotting.matplot_dep.visualize
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: GPy.plotting.matplot_dep
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.plotting.rst b/doc/GPy.plotting.rst
new file mode 100644
index 00000000..af035515
--- /dev/null
+++ b/doc/GPy.plotting.rst
@@ -0,0 +1,17 @@
+GPy.plotting package
+====================
+
+Subpackages
+-----------
+
+.. toctree::
+
+    GPy.plotting.matplot_dep
+
+Module contents
+---------------
+
+.. automodule:: GPy.plotting
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 60092e91..9be6dbec 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -13,6 +13,7 @@ Subpackages
     GPy.likelihoods
     GPy.mappings
     GPy.models
+    GPy.plotting
     GPy.testing
     GPy.util
 
diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index bd5258b7..2d1132d7 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -4,22 +4,6 @@ GPy.testing package
 Submodules
 ----------
 
-GPy.testing.bgplvm_tests module
--------------------------------
-
-.. automodule:: GPy.testing.bgplvm_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.testing.cgd_tests module
-----------------------------
-
-.. automodule:: GPy.testing.cgd_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 GPy.testing.examples_tests module
 ---------------------------------
 
@@ -28,10 +12,18 @@ GPy.testing.examples_tests module
     :undoc-members:
     :show-inheritance:
 
-GPy.testing.gplvm_tests module
-------------------------------
+GPy.testing.fitc module
+-----------------------
 
-.. automodule:: GPy.testing.gplvm_tests
+.. automodule:: GPy.testing.fitc
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.index_operations_tests module
+-----------------------------------------
+
+.. automodule:: GPy.testing.index_operations_tests
     :members:
     :undoc-members:
     :show-inheritance:
@@ -44,18 +36,42 @@ GPy.testing.kernel_tests module
     :undoc-members:
     :show-inheritance:
 
-GPy.testing.mapping_tests module
---------------------------------
+GPy.testing.likelihood_tests module
+-----------------------------------
 
-.. automodule:: GPy.testing.mapping_tests
+.. automodule:: GPy.testing.likelihood_tests
     :members:
     :undoc-members:
     :show-inheritance:
 
-GPy.testing.mrd_tests module
-----------------------------
+GPy.testing.model_tests module
+------------------------------
 
-.. automodule:: GPy.testing.mrd_tests
+.. automodule:: GPy.testing.model_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.observable_tests module
+-----------------------------------
+
+.. automodule:: GPy.testing.observable_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.parameterized_tests module
+--------------------------------------
+
+.. automodule:: GPy.testing.parameterized_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.testing.pickle_tests module
+-------------------------------
+
+.. automodule:: GPy.testing.pickle_tests
     :members:
     :undoc-members:
     :show-inheritance:
@@ -68,38 +84,6 @@ GPy.testing.prior_tests module
     :undoc-members:
     :show-inheritance:
 
-GPy.testing.psi_stat_expectation_tests module
----------------------------------------------
-
-.. automodule:: GPy.testing.psi_stat_expectation_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.testing.psi_stat_gradient_tests module
-------------------------------------------
-
-.. automodule:: GPy.testing.psi_stat_gradient_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.testing.sparse_gplvm_tests module
--------------------------------------
-
-.. automodule:: GPy.testing.sparse_gplvm_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-GPy.testing.unit_tests module
------------------------------
-
-.. automodule:: GPy.testing.unit_tests
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
 
 Module contents
 ---------------
diff --git a/doc/GPy.util.rst b/doc/GPy.util.rst
index c86280a7..14c7643b 100644
--- a/doc/GPy.util.rst
+++ b/doc/GPy.util.rst
@@ -1,20 +1,21 @@
 GPy.util package
 ================
 
-Subpackages
------------
-
-.. toctree::
-
-    GPy.util.latent_space_visualizations
-
 Submodules
 ----------
 
-GPy.util.Tango module
----------------------
+GPy.util.block_matrices module
+------------------------------
 
-.. automodule:: GPy.util.Tango
+.. automodule:: GPy.util.block_matrices
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.caching module
+-----------------------
+
+.. automodule:: GPy.util.caching
     :members:
     :undoc-members:
     :show-inheritance:
@@ -27,6 +28,14 @@ GPy.util.classification module
     :undoc-members:
     :show-inheritance:
 
+GPy.util.config module
+----------------------
+
+.. automodule:: GPy.util.config
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.util.datasets module
 ------------------------
 
@@ -35,6 +44,14 @@ GPy.util.datasets module
     :undoc-members:
     :show-inheritance:
 
+GPy.util.debug module
+---------------------
+
+.. automodule:: GPy.util.debug
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.util.decorators module
 --------------------------
 
@@ -43,6 +60,46 @@ GPy.util.decorators module
     :undoc-members:
     :show-inheritance:
 
+GPy.util.diag module
+--------------------
+
+.. automodule:: GPy.util.diag
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.erfcx module
+---------------------
+
+.. automodule:: GPy.util.erfcx
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.functions module
+-------------------------
+
+.. automodule:: GPy.util.functions
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.gpu_init module
+------------------------
+
+.. automodule:: GPy.util.gpu_init
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.initialization module
+------------------------------
+
+.. automodule:: GPy.util.initialization
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.util.linalg module
 ----------------------
 
@@ -51,6 +108,22 @@ GPy.util.linalg module
     :undoc-members:
     :show-inheritance:
 
+GPy.util.linalg_gpu module
+--------------------------
+
+.. automodule:: GPy.util.linalg_gpu
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.ln_diff_erfs module
+----------------------------
+
+.. automodule:: GPy.util.ln_diff_erfs
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.util.misc module
 --------------------
 
@@ -67,6 +140,14 @@ GPy.util.mocap module
     :undoc-members:
     :show-inheritance:
 
+GPy.util.mpi module
+-------------------
+
+.. automodule:: GPy.util.mpi
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 GPy.util.multioutput module
 ---------------------------
 
@@ -75,18 +156,34 @@ GPy.util.multioutput module
     :undoc-members:
     :show-inheritance:
 
-GPy.util.plot module
---------------------
+GPy.util.netpbmfile module
+--------------------------
 
-.. automodule:: GPy.util.plot
+.. automodule:: GPy.util.netpbmfile
     :members:
     :undoc-members:
     :show-inheritance:
 
-GPy.util.plot_latent module
----------------------------
+GPy.util.normalizer module
+--------------------------
 
-.. automodule:: GPy.util.plot_latent
+.. automodule:: GPy.util.normalizer
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.parallel module
+------------------------
+
+.. automodule:: GPy.util.parallel
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.pca module
+-------------------
+
+.. automodule:: GPy.util.pca
     :members:
     :undoc-members:
     :show-inheritance:
@@ -99,18 +196,26 @@ GPy.util.squashers module
     :undoc-members:
     :show-inheritance:
 
-GPy.util.univariate_Gaussian module
------------------------------------
+GPy.util.subarray_and_sorting module
+------------------------------------
 
-.. automodule:: GPy.util.univariate_Gaussian
+.. automodule:: GPy.util.subarray_and_sorting
     :members:
     :undoc-members:
     :show-inheritance:
 
-GPy.util.visualize module
--------------------------
+GPy.util.symbolic module
+------------------------
 
-.. automodule:: GPy.util.visualize
+.. automodule:: GPy.util.symbolic
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+GPy.util.univariate_Gaussian module
+-----------------------------------
+
+.. automodule:: GPy.util.univariate_Gaussian
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/index.rst b/doc/index.rst
index 4d0833a4..87d80be3 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -19,8 +19,6 @@ You may also be interested by some examples in the GPy/examples folder.
 Contents:
 
 .. toctree::
-   :maxdepth: 4
-
    GPy
 
 
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 3d3ab10a..29eefa72 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -23,15 +23,15 @@ Note that the observations Y include some noise.
 
 The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
 
-    kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
+    kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
 
 The parameter ``input_dim`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Many other kernels are implemented such as:
 
-* linear (``GPy.kern.linear``)
-* exponential kernel (``GPy.kern.exponential``)
-* Matern 3/2 (``GPy.kern.Matern32``)
-* Matern 5/2 (``GPy.kern.Matern52``)
-* spline (``GPy.kern.spline``)
+* linear (:py:class:`~GPy.kern.Linear`)
+* exponential kernel (:py:class:`GPy.kern.Exponential`)
+* Matern 3/2 (:py:class:`GPy.kern.Matern32`)
+* Matern 5/2 (:py:class:`GPy.kern.Matern52`)
+* spline (:py:class:`GPy.kern.Spline`)
 * and many others...
 
 The inputs required for building the model are the observations and the kernel::
@@ -45,38 +45,28 @@ By default, some observation noise is added to the modle. The functions ``print`
 
 gives the following output: ::
 
-    Marginal log-likelihood: -4.479e+00
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  1.0000  |               |        |         
-      rbf_lengthscale  |  1.0000  |               |        |         
-      noise_variance   |  1.0000  |               |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : -22.8178418808
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |  Value  |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |    1.0  |     +ve      |         |         
+    rbf.lengthscale          |    1.0  |     +ve      |         |         
+    Gaussian_noise.variance  |    1.0  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m1.png
     :align:   center
     :height: 350px
 
-    GP regression model before optimization of the parameters. The shaded region corresponds to 95% confidence intervals (ie +/- 2 standard deviation).
+    GP regression model before optimization of the parameters. The shaded region corresponds to ~95% confidence intervals (ie +/- 2 standard deviation).
 
-The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is to find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
+The default values of the kernel parameters may not be relevant for
+the current data (for example, the confidence intervals seems too wide
+on the previous figure). A common approach is to find the values of
+the parameters that maximize the likelihood of the data. It as easy as
+calling ``m.optimize`` in GPy::
 
-* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
-* Run the optimization
-
-There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
-
-    m.ensure_default_constraints() # or similarly m.constrain_positive('')
-
-but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
-
-    m.unconstrain('')               # may be used to remove the previous constrains
-    m.constrain_positive('.*rbf_variance')
-    m.constrain_bounded('.*lengthscale',1.,10. )
-    m.constrain_fixed('.*noise',0.0025)
-
-Once the constrains have been imposed, the model can be optimized::
-
-    m.optimize()
+  m.optimize()
 
 If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
 
@@ -84,13 +74,15 @@ If we want to perform some restarts to try to improve the result of the optimiza
 
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
 
-    Marginal log-likelihood: 3.603e+01
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  0.8151  |     (+ve)     |        |         
-      rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
-      noise_variance   |  0.0025  |     Fixed     |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : 11.947469082
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |       Value        |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |     0.74229417323  |     +ve      |         |         
+    rbf.lengthscale          |     1.43020495724  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00325654460991  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m2.png
     :align:   center
     :height: 350px
@@ -113,30 +105,36 @@ Here is a 2 dimensional example::
     Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
 
     # define kernel
-    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.White(2)
 
     # create simple GP model
     m = GPy.models.GPRegression(X,Y,ker)
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize and plot
-    m.optimize('tnc', max_f_eval = 1000)
+    m.optimize(max_f_eval = 1000)
     m.plot()
     print(m)
 
 The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
 
-    Marginal log-likelihood: 6.682e+01
-             Name          |  Value   |  Constraints  |  Ties  |  Prior  
-    ---------------------------------------------------------------------
-        Mat52_variance     |  0.3860  |     (+ve)     |        |         
-      Mat52_lengthscale_0  |  2.0578  |     (+ve)     |        |         
-      Mat52_lengthscale_1  |  1.8542  |     (+ve)     |        |         
-        white_variance     |  0.0023  |     (+ve)     |        |         
-        noise variance     |  0.0000  |     (+ve)     |        |         
+  Name                 : GP regression
+  Log-likelihood       : 26.787156248
+  Number of Parameters : 5
+  Parameters:
+    GP_regression.           |        Value        |  Constraint  |  Prior  |  Tied to
+    add.Mat52.variance       |     0.385463739076  |     +ve      |         |         
+    add.Mat52.lengthscale    |               (2,)  |     +ve      |         |         
+    add.white.variance       |  0.000835329608514  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.000835329608514  |     +ve      |         |         
 
+If you want to see the ``ARD`` parameters explicitly print them
+directly::
+
+  >>> print m.add.Mat52.lengthscale
+    Index  |  GP_regression.add.Mat52.lengthscale  |  Constraint  |   Prior   |  Tied to
+     [0]   |                            1.9575587  |     +ve      |           |    N/A    
+     [1]   |                            1.9689948  |     +ve      |           |    N/A    
+  
 .. figure::  Figures/tuto_GP_regression_m3.png
     :align:   center
     :height: 350px
diff --git a/doc/tuto_creating_new_models.rst b/doc/tuto_creating_new_models.rst
index 5c51cdad..07f6194f 100644
--- a/doc/tuto_creating_new_models.rst
+++ b/doc/tuto_creating_new_models.rst
@@ -20,13 +20,13 @@ input parameters :math:`\mathbf{X}`. Where
 Obligatory methods
 ==================
 
-:py:meth:`~GPy.core.model.Model.__init__` :
+:py:func:`~GPy.core.model.Model.__init__` :
 	Initialize the model with the given parameters. These need to
 	be added to the model by calling
 	`self.add_parameter(<param>)`, where param needs to be a
 	parameter handle (See parameterized_ for details).::
 	
-		self.X = GPy.core.Param("input", X)
+		self.X = GPy.Param("input", X)
 		self.add_parameter(self.X)
 		
 :py:meth:`~GPy.core.model.Model.log_likelihood` :
@@ -39,12 +39,61 @@ Obligatory methods
 :py:meth:`~GPy.core.model.Model.parameters_changed` :
     Updates the internal state of the model and sets the gradient of
     each parameter handle in the hierarchy with respect to the
-    log_likelihod. Thus here we need to put the negative derivative of
-    the rosenbrock function:
+    log_likelihod. Thus here we need to set the negative derivative of
+    the rosenbrock function for the parameters. In this case it is the
+    gradient for self.X.::
 
  		self.X.gradient = -scipy.optimize.rosen_der(self.X)
 
 
+Here the full code for the `Rosen` class::
+
+  from GPy import Model, Param
+  import scipy
+  class Rosen(Model):
+      def __init__(self, X, name='rosenbrock'):
+          super(Rosen, self).__init__(name=name)
+          self.X = Param("input", X)
+	  self.add_parameter(self.X)
+      def log_likelihood(self):
+          return -scipy.optimize.rosen(self.X)
+      def parameters_changed(self):
+          self.X.gradient = -scipy.optimize.rosen_der(self.X)
+
+In order to test the newly created model, we can check the gradients
+and optimize a standard rosenbrock run::
+
+  >>> m = Rosen(np.array([-1,-1]))
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -404.0
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> m.checkgrad(verbose=True)
+             Name           |     Ratio     |  Difference   |  Analytical   |   Numerical   
+  ------------------------------------------------------------------------------------------
+   rosenbrock.input[[0]]    |   1.000000    |   0.000000    |  -804.000000  |  -804.000000  
+   rosenbrock.input[[1]]    |   1.000000    |   0.000000    |  -400.000000  |  -400.000000  
+  >>> m.optimize()
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -6.52150088871e-15
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> print m.input
+    Index  |  rosenbrock.input  |  Constraint  |   Prior   |  Tied to
+     [0]   |        0.99999994  |              |           |    N/A    
+     [1]   |        0.99999987  |              |           |    N/A    
+  >>> print m.gradient
+  [ -1.91169809e-06,   1.01852309e-06]
+  
+This is the optimium for the 2D Rosenbrock function, as expected, and
+the gradient of the inputs are almost zero.
+
 Optional methods
 ================
 
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 5bd0511e..80b2ac77 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -40,89 +40,199 @@ is shown. For each parameter, the table contains the name
 of the parameter, the current value, and in case there are 
 defined: constraints, ties and prior distrbutions associated. ::
 
-	Log-likelihood: 6.309e+02
+  Name                 : sparse gp
+  Log-likelihood       : 588.947189413
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.91644016819  |     +ve      |         |         
+    rbf.lengthscale          |     2.62103621347  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
-	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
-	------------------------------------------------------------------
-	    iip_0_0      |  -1.4671  |               |        |         
-	    iip_1_0      |  2.6378   |               |        |         
-	    iip_2_0      |  -0.0396  |               |        |         
-	    iip_3_0      |  -2.6372  |               |        |         
-	    iip_4_0      |  1.4704   |               |        |         
-	 rbf_variance    |  1.5672   |     (+ve)     |        |         
-	rbf_lengthscale  |  2.5625   |     (+ve)     |        |         
-	white_variance   |  0.0000   |     (+ve)     |        |         
-	noise_variance   |  0.0022   |     (+ve)     |        |         
-
-In this case the kernel parameters (``rbf_variance``, 
-``rbf_lengthscale`` and ``white_variance``) as well as 
-the noise parameter (``noise_variance``), are constrained 
-to be positive, while the inducing inputs have not 
+In this case the kernel parameters (``rbf.variance``, 
+``rbf.lengthscale``) as well as 
+the likelihood noise parameter (``Gaussian_noise.variance``), are constrained 
+to be positive, while the inducing inputs have no
 constraints associated. Also there are no ties or prior defined.
 
-Setting and fetching parameters by name
-=======================================
-Another way to interact with the model's parameters is through
-the functions ``_get_param_names()``, ``_get_params()`` and 
-``_set_params()``.
+You can also print all subparts of the model, by printing the
+subcomponents individually::
 
-``_get_param_names()`` returns a list of the parameters names ::
+  print m.rbf
 
-	['iip_0_0',
-	 'iip_1_0',
-	 'iip_2_0',
-	 'iip_3_0',
-	 'iip_4_0',
-	 'rbf_variance',
-	 'rbf_lengthscale',
-	 'white_variance',
-	 'noise_variance']
+This will print the details of this particular parameter handle::
 
-``_get_params()`` returns an array of the parameters values ::
+    rbf.         |      Value      |  Constraint  |  Prior  |  Tied to
+    variance     |  1.91644016819  |     +ve      |         |         
+    lengthscale  |  2.62103621347  |     +ve      |         |         
 
-	array([ -1.46705227e+00,   2.63782176e+00,  -3.96422982e-02,
-		-2.63715255e+00,   1.47038653e+00,   1.56724596e+00,
-		 2.56248679e+00,   2.20963633e-10,   2.18379922e-03])
+When you want to get a closer look into
+multivalue parameters, print them directly::
 
-``_set_params()`` takes an array as input and substitutes 
-the current values of the parameters for those of the array. For example,
-we can define a new array of values and change the parameters as follows: ::
+  print m.inducing_inputs
 
-	new_params = np.array([1.,2.,3.,4.,1.,1.,1.,1.,1.])
-	m._set_params(new_params)
+  Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
+  [0 0]  |                  2.7189499  |              |           |    N/A    
+  [1 0]  |                 0.02006533  |              |           |    N/A    
+  [2 0]  |                 -1.5299386  |              |           |    N/A    
+  [3 0]  |                 -2.7001675  |              |           |    N/A    
+  [4 0]  |                  1.4654162  |              |           |    N/A    
 
-If we call the function ``_get_params()`` again, we will obtain the new
-parameters we have just set.
+Interacting with Parameters:
+=======================
+The preferred way of interacting with parameters is to act on the
+parameter handle itself.
+Interacting with parameter handles is simple. The names, printed by `print m`
+are accessible interactively and programatically. For example try to
+set kernels (`rbf`) `lengthscale` to `.2` and print the result::
 
-Parameters can be also set by name using dictionary notations. For example,
-let's change the lengthscale to .5: ::
+  m.rbf.lengthscale = .2
+  print m
 
-	m['rbf_lengthscale'] = .5
+You should see this::
 
-Here, the matching accepts a regular expression and therefore all parameters matching that regular expression are set to the given value. In this case rather 
-than passing as second output a single value, we can also 
-use a list of arrays. For example, lets change the inducing 
-inputs: ::
+  Name                 : sparse gp
+  Log-likelihood       : 588.947189413
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.91644016819  |     +ve      |         |         
+    rbf.lengthscale          |               0.2  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
-	m['iip'] = np.arange(-5,0)
+This will already have updated the model's inner state, so you can
+plot it or see the changes in the posterior `m.posterior` of the model.
 
-Getting the model's likelihood and gradients
+Regular expressions
+----------------
+The model's parameters can also be accessed through regular
+expressions, by 'indexing' the model with a regular expression,
+matching the parameter name. Through indexing by regular expression,
+you can only retrieve leafs of the hierarchy, and you can retrieve the
+values matched by calling `values()` on the returned object::
+
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                          2.1500132  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                       0.0024268215  |              |              |    N/A    
+  >>> print m['.*var'].values()
+  [ 2.1500132   0.00242682]
+  >>> print m['rbf']
+    Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
+     [0]   |                  2.1500132  |              |              |    N/A    
+    -----  |  sparse_gp.rbf.lengthscale  |  ----------  |  ----------  |  -------
+     [0]   |                  2.6782803  |              |              |    N/A    
+  
+There is access to setting parameters by regular expression,
+as well. Here are a few examples of how to set parameters by regular expression::
+
+  >>> m['.*var'] = .1
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.1  |              |              |    N/A    
+  >>> m['.*var'] = [.1, .2]
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.2  |              |              |    N/A    
+  
+The fact that only leaf nodes can be accesses we can print all
+parameters in a flattened view, by printing the regular expression
+match of matching all objects::
+
+  >>> print m['']
+    Index  |      sparse_gp.inducing_inputs      |  Constraint  |    Prior     |  Tied to
+    [0 0]  |                         -2.6716041  |              |              |    N/A    
+    [1 0]  |                         -1.4665111  |              |              |    N/A    
+    [2 0]  |                       -0.031010293  |              |              |    N/A    
+    [3 0]  |                          1.4563711  |              |              |    N/A    
+    [4 0]  |                          2.6803046  |              |              |    N/A    
+    -----  |       sparse_gp.rbf.variance        |  ----------  |  ----------  |  -------
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |      sparse_gp.rbf.lengthscale      |  ----------  |  ----------  |  -------
+     [0]   |                          2.6782803  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.2  |              |              |    N/A    
+
+Setting and fetching parameters `parameter_array`
+------------------------------------------
+Another way to interact with the model's parameters is through the
+`parameter_array`. The Parameter array holds all the parameters of the
+model in one place and is editable. It can be accessed through
+indexing the model for example you can set all the parameters through
+this mechanism::
+
+  >>> new_params = np.r_[[-4,-2,0,2,4], [.5,2], [.3]]
+  >>> print new_params
+  array([-4. , -2. ,  0. ,  2. ,  4. ,  0.5,  2. ,  0.3])
+  >>> m[:] = new_params
+  >>> print m
+  Name                 : sparse gp
+  Log-likelihood       : -147.561160209
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |  Value   |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |  (5, 1)  |              |         |         
+    rbf.variance             |     0.5  |     +sq      |         |         
+    rbf.lengthscale          |     2.0  |     +ve      |         |         
+    Gaussian_noise.variance  |     0.3  |     +sq      |         |         
+ 
+Parameters themselves (leafs of the hierarchy) can be indexed and used
+the same way as numpy arrays. First let us set a slice of the
+`inducing_inputs`::
+
+  >>> m.inducing_inputs[2:, 0] = [1,3,5]
+  >>> print m.inducing_indputs
+    Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
+    [0 0]  |                         -4  |              |           |    N/A    
+    [1 0]  |                         -2  |              |           |    N/A    
+    [2 0]  |                          1  |              |           |    N/A    
+    [3 0]  |                          3  |              |           |    N/A    
+    [4 0]  |                          5  |              |           |    N/A    
+
+Or you use the parameters as normal numpy arrays for calculations::
+
+  >>> precision = 1./m.Gaussian_noise.variance
+  array([ 3.33333333])
+
+Getting the model's log likelihood
 =============================================
 Appart form the printing the model,  the marginal 
 log-likelihood can be obtained by using the function
-``log_likelihood()``. Also, the log-likelihood gradients
-wrt. each parameter can be obtained with the funcion
-``_log_likelihood_gradients()``. ::
+``log_likelihood()``.::
 
-    m.log_likelihood()
-    -791.15371409346153
+    >>> m.log_likelihood()
+    array([-152.83377316])
 
-    m._log_likelihood_gradients()
-    array([  7.08278455e-03,   1.37118783e+01,   2.66948031e+00,
-             3.50184014e+00,   7.08278455e-03,  -1.43501702e+02,
-	     6.10662266e+01,  -2.18472649e+02,   2.14663691e+02])
+If you want to ensure the log likelihood as a float, call `float()`
+around it::
 
-Removing the model's constraints
+  >>> float(m.log_likelihood())
+  -152.83377316356177
+
+Getting the model parameter's gradients
+============================
+The gradients of a model can shed light on understanding the
+(possibly hard) optimization process. The gradients of each parameter
+handle can be accessed through their `gradient` field.::
+
+  >>> print m.gradient
+  [   5.51170031    9.71735112   -4.20282106   -3.45667035   -1.58828165
+   -2.11549358   12.40292787 -627.75467803]
+  >>> print m.rbf.gradient
+  [ -2.11549358  12.40292787]
+  >>> m.optimize()
+  >>> print m.gradient
+  [ -5.98046560e-04  -3.64576085e-04   1.98005930e-04   3.43381219e-04
+  -6.85685104e-04  -1.28800748e-05   1.08552429e-03   2.74058081e-01]
+
+Adjusting the model's constraints
 ================================
 When we initially call the example, it was optimized and hence the
 log-likelihood gradients were close to zero. However, since
@@ -130,88 +240,102 @@ we have been changing the parameters, the gradients are far from zero now.
 Next we are going to show how to optimize the model setting different 
 restrictions on the parameters. 
 
-Once a constrain has been set on a parameter, it is possible to remove it
-with the command ``unconstrain()``, and
-just as the previous matching commands, it also accepts regular expression.
-In this case we will remove all the constraints: ::
+Once a constraint has been set on a parameter, it is possible to remove
+it with the command ``unconstrain()``, which can be called on any
+parameter handle of the model. The methods `constrain()` and
+`unconstrain()` return the indices which were actually unconstrained,
+relative to the parameter handle the method was called on. This is
+particularly handy for reporting which parameters where reconstrained,
+when reconstraining a parameter, which was already constrained::
 
-	m.unconstrain('')
+	>>> m.rbf.variance.unconstrain()
+	array([0])
+	>>>m.unconstrain()
+	array([6, 7])
 
-Constraining and optimising the model
-=====================================
-A requisite needed for some parameters, such as variances,
-is to be positive. This is constraint is easily set 
-with the function ``constrain_positive()``. Regular expressions
-are also accepted. ::
+If you want to unconstrain only a specific constraint, you can pass it
+as an argument of ``unconstrain(Transformation)`` (:py:class:`~GPy.constraints.Transformation`), or call
+the respective method, such as ``unconstrain_fixed()`` (or
+``unfix()``) to only unfix fixed parameters.::
 
-    m.constrain_positive('.*var')
+  >>> m.inducing_input[0].fix()
+  >>> m.unfix()
+  >>> m.rbf.constrain_positive()
+  >>> print m
+  Name                 : sparse gp
+  Log-likelihood       : 620.741066698
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.48329711218  |     +ve      |         |         
+    rbf.lengthscale          |      2.5430947048  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00229714444128  |              |         |         
 
-For convenience, GPy also provides a catch all function 
-which ensures that anything which appears to require 
-positivity is constrianed appropriately::
+As you can see, ``unfix()`` only unfixed the inducing_input, and did
+not change the positive constraint of the kernel.
 
-    m.ensure_default_constraints()
+The parameter handles come with default constraints, so you will
+rarely be needing to adjust the constraints of a model. In the rare
+cases of needing to adjust the constraints of a model, or in need of
+fixing some parameters, you can do so with the functions
+``constrain_{positive|negative|bounded|fixed}()``.::
 
-Fixing parameters
-=================
-Parameters values can be fixed using ``constrain_fixed()``. 
-For example we can define the first inducing input to be 
-fixed on zero: ::
+    m['.*var'].constrain_positive()
 
-    m.constrain_fixed('iip_0',0)
-	
-Bounding parameters
-===================
-Defining bounding constraints is an easily task in GPy too,
-it only requires to use the function ``constrain_bounded()``.
-For example, lets bound inducing inputs 2 and 3 to have
-values between -4 and -1: ::
+Available Constraints
+==============
+
+* :py:meth:`~GPy.constraints.Logexp`
+* :py:meth:`~GPy.constraints.Exponent`
+* :py:meth:`~GPy.constraints.Square`
+* :py:meth:`~GPy.constraints.Logistic`
+* :py:meth:`~GPy.constraints.LogexpNeg`
+* :py:meth:`~GPy.constraints.NegativeExponent`  
+* :py:meth:`~GPy.constraints.NegativeLogexp`
 
-    m.constrain_bounded('iip_(1|2)',-4,-1)
 
 Tying Parameters
-================
-The values of two or more parameters can be tied together,
-so that they share the same value during optimization.
-The function to do so is ``tie_params()``. For the example
-we are using, it doesn't make sense to tie parameters together,
-however for the sake of the example we will tie the white noise
-and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
-for a proper use of the tying capabilities.::
+============
+Not yet implemented for GPy version 0.6.0
 
-    m.tie_params('.*e_var')
 
 Optimizing the model
 ====================
+
 Once we have finished defining the constraints, 
 we can now optimize the model with the function
 ``optimize``.::
 
-    m.optimize()
+  m.Gaussian_noise.constrain_positive()
+  m.rbf.constrain_positive()
+  m.optimize()
 
-We can print again the model and check the new results.
-The table now shows that ``iip_0_0`` is fixed, ``iip_1_0`` 
-and ``iip_2_0`` are bounded and the kernel parameters are constrained to
-be positive. In addition the table now indicates that
-white_variance and noise_variance are tied together.::
+By deafult, GPy uses the lbfgsb optimizer.
+ 
+Some optional parameters may be discussed here.
 
-	Log-likelihood: 9.967e+01
+* ``optimizer``: which optimizer to use, currently there are ``lbfgsb, fmin_tnc,
+  scg, simplex`` or any unique identifier uniquely identifying an
+  optimizer. Thus, you can say ``m.optimize('bfgs') for using the
+  ``lbfgsb`` optimizer
+* ``messages``: if the optimizer is verbose. Each optimizer has its
+  own way of printing, so do not be confused by differing messages of
+  different optimizers
+* ``max_iters``: Maximum number of iterations to take. Some optimizers
+  see iterations as function calls, others as iterations of the
+  algorithm. Please be advised to look into ``scipy.optimize`` for
+  more instructions, if the number of iterations matter, so you can
+  give the right parameters to ``optimize()``
+* ``gtol``: only for some optimizers. Will determine the convergence
+  criterion, as the tolerance of gradient to finish the optimization.
 
-  	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
-	------------------------------------------------------------------
-	    iip_0_0      |  0.0000   |     Fixed     |        |         
-	    iip_1_0      |  -2.8834  |   (-4, -1)    |        |         
-	    iip_2_0      |  -1.9152  |   (-4, -1)    |        |         
-	    iip_3_0      |  1.5034   |               |        |         
-	    iip_4_0      |  -1.0162  |               |        |         
-	 rbf_variance    |  0.0158   |     (+ve)     |        |         
-	rbf_lengthscale  |  0.9760   |     (+ve)     |        |         
-	white_variance   |  0.0049   |     (+ve)     |  (0)   |         
-	noise_variance   |  0.0049   |     (+ve)     |  (0)   |         
+Further Reading 
+=============== 
 
-
-Further Reading
-===============
-All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read :ref:`creating_new_models`. 
-
-By deafult, GPy uses the scg optimizer. To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+All of the mechansiams for dealing
+with parameters are baked right into GPy.core.model, from which all of
+the classes in GPy.models inherrit. To learn how to construct your own
+model, you might want to read :ref:`creating_new_models`.  If you want
+to learn how to create kernels, please refer to
+:ref:`creating_new_kernels`