From 783aa0ca0dffdd1ba7955a1622f5169355a85e1d Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Fri, 5 Sep 2014 17:59:00 +0100
Subject: [PATCH 01/22] [accassibility] GPy.constraints now accassible

---
 GPy/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index b6bf81b7..8c5f7ed5 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -5,6 +5,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 import core
 from core.parameterization import transformations, priors
+constraints = transformations
 import models
 import mappings
 import inference

From 2dbc4cc57b835112b100d51f3bb9d8ed58030819 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Fri, 5 Sep 2014 17:59:40 +0100
Subject: [PATCH 02/22] [printing] warning when reconstraining now prints
 hierarchy names

---
 GPy/core/parameterization/parameter_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index 2831eaf7..cae999d9 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -652,7 +652,7 @@ class Indexable(Nameable, Observable):
         """
         if warning and reconstrained.size > 0:
             # TODO: figure out which parameters have changed and only print those
-            print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
+            print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
         index = self._raveled_index()
         which.add(what, index)
         return index

From 3a7e42f24f316e93c9bce2ca31b88ace748ff825 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Fri, 5 Sep 2014 18:00:29 +0100
Subject: [PATCH 03/22] [documentation] bits and pieces for
 interacting_with_models

---
 doc/tuto_interacting_with_models.rst | 242 ++++++++++++++++++++-------
 1 file changed, 181 insertions(+), 61 deletions(-)

diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 27980665..184d9c17 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -41,15 +41,14 @@ of the parameter, the current value, and in case there are
 defined: constraints, ties and prior distrbutions associated. ::
 
   Name                 : sparse gp
-  Log-likelihood       : -405.646051581
+  Log-likelihood       : 588.947189413
   Number of Parameters : 8
   Parameters:
-    sparse_gp.               |  Value   |  Constraint  |  Prior  |  Tied to
-    inducing inputs          |  (5, 1)  |              |         |         
-    rbf.variance             |     1.0  |     +ve      |         |         
-    rbf.lengthscale          |     1.0  |     +ve      |         |         
-    Gaussian_noise.variance  |     1.0  |     +ve      |         |         
-  
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.91644016819  |     +ve      |         |         
+    rbf.lengthscale          |     2.62103621347  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
 In this case the kernel parameters (``rbf.variance``, 
 ``rbf.lengthscale``) as well as 
@@ -57,53 +56,152 @@ the likelihood noise parameter (``Gaussian_noise.variance``), are constrained
 to be positive, while the inducing inputs have no
 constraints associated. Also there are no ties or prior defined.
 
-Setting and fetching parameters by name
-=======================================
-Another way to interact with the model's parameters is through
-the functions ``_get_param_names()``, ``_get_params()`` and 
-``_set_params()``.
+You can also print all subparts of the model, by printing the
+subcomponents individually::
 
-``_get_param_names()`` returns a list of the parameters names ::
+  print m.rbf
 
-	['iip_0_0',
-	 'iip_1_0',
-	 'iip_2_0',
-	 'iip_3_0',
-	 'iip_4_0',
-	 'rbf_variance',
-	 'rbf_lengthscale',
-	 'white_variance',
-	 'noise_variance']
+This will print the details of this particular parameter handle::
 
-``_get_params()`` returns an array of the parameters values ::
+    rbf.         |      Value      |  Constraint  |  Prior  |  Tied to
+    variance     |  1.91644016819  |     +ve      |         |         
+    lengthscale  |  2.62103621347  |     +ve      |         |         
 
-	array([ -1.46705227e+00,   2.63782176e+00,  -3.96422982e-02,
-		-2.63715255e+00,   1.47038653e+00,   1.56724596e+00,
-		 2.56248679e+00,   2.20963633e-10,   2.18379922e-03])
+When you want to get a closer look into
+multivalue parameters, print them directly::
 
-``_set_params()`` takes an array as input and substitutes 
-the current values of the parameters for those of the array. For example,
-we can define a new array of values and change the parameters as follows: ::
+  print m.inducing_indputs
 
-	new_params = np.array([1.,2.,3.,4.,1.,1.,1.,1.,1.])
-	m._set_params(new_params)
+  Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
+  [0 0]  |                  2.7189499  |              |           |    N/A    
+  [1 0]  |                 0.02006533  |              |           |    N/A    
+  [2 0]  |                 -1.5299386  |              |           |    N/A    
+  [3 0]  |                 -2.7001675  |              |           |    N/A    
+  [4 0]  |                  1.4654162  |              |           |    N/A    
 
-If we call the function ``_get_params()`` again, we will obtain the new
-parameters we have just set.
+Interacting with Parameters:
+=======================
+The preferred way of interacting with parameters is to act on the
+parameter handle itself.
+Interacting with parameter handles is simple. The names, printed by `print m`
+are accessible interactively and programatically. For example try to
+set kernels (`rbf`) `lengthscale` to `.2` and print the result::
 
-Parameters can be also set by name using dictionary notations. For example,
-let's change the lengthscale to .5: ::
+  m.rbf.lengthscale = .2
+  print m
 
-	m['rbf_lengthscale'] = .5
+You should see this::
 
-Here, the matching accepts a regular expression and therefore all parameters matching that regular expression are set to the given value. In this case rather 
-than passing as second output a single value, we can also 
-use a list of arrays. For example, lets change the inducing 
-inputs: ::
+  Name                 : sparse gp
+  Log-likelihood       : 588.947189413
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.91644016819  |     +ve      |         |         
+    rbf.lengthscale          |              0.2  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
-	m['iip'] = np.arange(-5,0)
+This will already have updated the model's inner state, so you can
+plot it or see the changes in the posterior `m.posterior` of the model.
 
-Getting the model's likelihood and gradients
+Regular expressions
+----------------
+The model's parameters can also be accessed through regular
+expressions, by 'indexing' the model with a regular expression,
+matching the parameter name. Through indexing by regular expression,
+you can only retrieve leafs of the hierarchy, and you can retrieve the
+values matched by calling `values()` on the returned object::
+
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                          2.1500132  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                       0.0024268215  |              |              |    N/A    
+  >>> print m['.*var'].values()
+  [ 2.1500132   0.00242682]
+  >>> print m['rbf']
+     Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
+     [0]   |                  2.1500132  |              |              |    N/A    
+    -----  |  sparse_gp.rbf.lengthscale  |  ----------  |  ----------  |  -------
+     [0]   |                  2.6782803  |              |              |    N/A    
+  
+There is access to setting parameters by regular expression,
+as well. Here are a few examples of how to set parameters by regular expression::
+
+  >>> m['.*var'] = .1
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.1  |              |              |    N/A    
+  >>> m['.*var'] = [.1, .2]
+  >>> print m['.*var']
+    Index  |       sparse_gp.rbf.variance        |  Constraint  |    Prior     |  Tied to
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.2  |              |              |    N/A    
+  
+The fact that only leaf nodes can be accesses we can print all
+parameters in a flattened view, by printing the regular expression
+match of matching all objects::
+
+  >>> print m['']
+    Index  |      sparse_gp.inducing_inputs      |  Constraint  |    Prior     |  Tied to
+    [0 0]  |                         -2.6716041  |              |              |    N/A    
+    [1 0]  |                         -1.4665111  |              |              |    N/A    
+    [2 0]  |                       -0.031010293  |              |              |    N/A    
+    [3 0]  |                          1.4563711  |              |              |    N/A    
+    [4 0]  |                          2.6803046  |              |              |    N/A    
+    -----  |       sparse_gp.rbf.variance        |  ----------  |  ----------  |  -------
+     [0]   |                                0.1  |              |              |    N/A    
+    -----  |      sparse_gp.rbf.lengthscale      |  ----------  |  ----------  |  -------
+     [0]   |                          2.6782803  |              |              |    N/A    
+    -----  |  sparse_gp.Gaussian_noise.variance  |  ----------  |  ----------  |  -------
+     [0]   |                                0.2  |              |              |    N/A    
+
+Setting and fetching parameters `parameter_array`
+------------------------------------------
+Another way to interact with the model's parameters is through the
+`parameter_array`. The Parameter array holds all the parameters of the
+model in one place and is editable. It can be accessed through
+indexing the model for example you can set all the parameters through
+this mechanism::
+
+  >>> new_params = np.r_[[-4,-2,0,2,4], [.5,2], [.3]]
+  >>> print new_params
+  array([-4. , -2. ,  0. ,  2. ,  4. ,  0.5,  2. ,  0.3])
+  >>> m[:] = new_params
+  >>> print m
+  Name                 : sparse gp
+  Log-likelihood       : -147.561160209
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |  Value   |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |  (5, 1)  |              |         |         
+    rbf.variance             |     0.5  |     +sq      |         |         
+    rbf.lengthscale          |     2.0  |     +ve      |         |         
+    Gaussian_noise.variance  |     0.3  |     +sq      |         |         
+ 
+Parameters themselves (leafs of the hierarchy) can be indexed and used
+the same way as numpy arrays. First let us set a slice of the
+`inducing_inputs`::
+
+  >>> m.inducing_inputs[2:, 0] = [1,3,5]
+  >>> print m.inducing_indputs
+    Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
+    [0 0]  |                         -4  |              |           |    N/A    
+    [1 0]  |                         -2  |              |           |    N/A    
+    [2 0]  |                          1  |              |           |    N/A    
+    [3 0]  |                          3  |              |           |    N/A    
+    [4 0]  |                          5  |              |           |    N/A    
+
+Or you use the parameters as normal numpy arrays for calculations::
+
+  >>> precision = 1./m.Gaussian_noise.variance
+  array([ 3.33333333])
+
+Getting the model's log likelihood
 =============================================
 Appart form the printing the model,  the marginal 
 log-likelihood can be obtained by using the function
@@ -111,15 +209,32 @@ log-likelihood can be obtained by using the function
 wrt. each parameter can be obtained with the funcion
 ``_log_likelihood_gradients()``. ::
 
-    m.log_likelihood()
-    -791.15371409346153
+    >>> m.log_likelihood()
+    array([-152.83377316])
 
-    m._log_likelihood_gradients()
-    array([  7.08278455e-03,   1.37118783e+01,   2.66948031e+00,
-             3.50184014e+00,   7.08278455e-03,  -1.43501702e+02,
-	     6.10662266e+01,  -2.18472649e+02,   2.14663691e+02])
+If you want to ensure the log likelihood as a float, call `float()`
+around it::
 
-Removing the model's constraints
+  >>> float(m.log_likelihood())
+  -152.83377316356177
+
+Getting the model parameter's gradients
+============================
+The gradients of a model can shed light on understanding the
+(possibly hard) optimization process. The gradients of each parameter
+handle can be accessed through their `gradient` field.::
+
+  >>> print m.gradient
+  [   5.51170031    9.71735112   -4.20282106   -3.45667035   -1.58828165
+   -2.11549358   12.40292787 -627.75467803]
+  >>> print m.rbf.gradient
+  [ -2.11549358  12.40292787]
+  >>> m.optimize()
+  >>> print m.gradient
+  [ -5.98046560e-04  -3.64576085e-04   1.98005930e-04   3.43381219e-04
+  -6.85685104e-04  -1.28800748e-05   1.08552429e-03   2.74058081e-01]
+
+Adjusting the model's constraints
 ================================
 When we initially call the example, it was optimized and hence the
 log-likelihood gradients were close to zero. However, since
@@ -127,21 +242,26 @@ we have been changing the parameters, the gradients are far from zero now.
 Next we are going to show how to optimize the model setting different 
 restrictions on the parameters. 
 
-Once a constrain has been set on a parameter, it is possible to remove it
-with the command ``unconstrain()``, and
-just as the previous matching commands, it also accepts regular expression.
-In this case we will remove all the constraints: ::
+Once a constraint has been set on a parameter, it is possible to remove
+it with the command ``unconstrain()``, which can be called on any
+parameter handle of the model. The methods `constrain()` and
+`unconstrain()` return the indices which were actually unconstrained,
+relative to the parameter handle the method was called on. This is
+particularly handy for reporting which parameters where reconstrained,
+when reconstraining a parameter, which was already constrained::
 
-	m.unconstrain('')
+	>>> m.rbf.variance.unconstrain()
+	array([0])
+	>>>m.unconstrain()
+	array([6, 7])
 
-Constraining and optimising the model
-=====================================
-A requisite needed for some parameters, such as variances,
-is to be positive. This is constraint is easily set 
-with the function ``constrain_positive()``. Regular expressions
-are also accepted. ::
+The parameter handles come with default constraints, so you will
+rarely be needing to adjust the constraints of a model. In the rare
+cases of needing to adjust the constraints of a model, or in need of
+fixing some parameters, you can do so with the functions
+``constrain_{positive|negative|bounded|fixed}()``.::
 
-    m.constrain_positive('.*var')
+    m['.*var'].constrain_positive()
 
 For convenience, GPy also provides a catch all function 
 which ensures that anything which appears to require 

From b9e897c50deef7d607881a72b3c57ae60b560e05 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Sun, 7 Sep 2014 15:42:03 +0100
Subject: [PATCH 04/22] [documentation] updated big parts of the doc

---
 GPy/__init__.py                      |   4 +
 doc/index.rst                        |   2 -
 doc/tuto_GP_regression.rst           | 102 +++++++++++-----------
 doc/tuto_creating_new_models.rst     |  54 +++++++++++-
 doc/tuto_interacting_with_models.rst | 125 ++++++++++++++-------------
 5 files changed, 171 insertions(+), 116 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 8c5f7ed5..819f54bf 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -18,6 +18,10 @@ from nose.tools import nottest
 import kern
 import plotting
 
+# Direct imports for convenience:
+from core import Model
+from core.parameterization import Param, Parameterized, ObsAr
+
 @nottest
 def tests():
     Tester(testing).test(verbose=10)
diff --git a/doc/index.rst b/doc/index.rst
index 4d0833a4..87d80be3 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -19,8 +19,6 @@ You may also be interested by some examples in the GPy/examples folder.
 Contents:
 
 .. toctree::
-   :maxdepth: 4
-
    GPy
 
 
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 3d3ab10a..29eefa72 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -23,15 +23,15 @@ Note that the observations Y include some noise.
 
 The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
 
-    kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
+    kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
 
 The parameter ``input_dim`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Many other kernels are implemented such as:
 
-* linear (``GPy.kern.linear``)
-* exponential kernel (``GPy.kern.exponential``)
-* Matern 3/2 (``GPy.kern.Matern32``)
-* Matern 5/2 (``GPy.kern.Matern52``)
-* spline (``GPy.kern.spline``)
+* linear (:py:class:`~GPy.kern.Linear`)
+* exponential kernel (:py:class:`GPy.kern.Exponential`)
+* Matern 3/2 (:py:class:`GPy.kern.Matern32`)
+* Matern 5/2 (:py:class:`GPy.kern.Matern52`)
+* spline (:py:class:`GPy.kern.Spline`)
 * and many others...
 
 The inputs required for building the model are the observations and the kernel::
@@ -45,38 +45,28 @@ By default, some observation noise is added to the modle. The functions ``print`
 
 gives the following output: ::
 
-    Marginal log-likelihood: -4.479e+00
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  1.0000  |               |        |         
-      rbf_lengthscale  |  1.0000  |               |        |         
-      noise_variance   |  1.0000  |               |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : -22.8178418808
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |  Value  |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |    1.0  |     +ve      |         |         
+    rbf.lengthscale          |    1.0  |     +ve      |         |         
+    Gaussian_noise.variance  |    1.0  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m1.png
     :align:   center
     :height: 350px
 
-    GP regression model before optimization of the parameters. The shaded region corresponds to 95% confidence intervals (ie +/- 2 standard deviation).
+    GP regression model before optimization of the parameters. The shaded region corresponds to ~95% confidence intervals (ie +/- 2 standard deviation).
 
-The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is to find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
+The default values of the kernel parameters may not be relevant for
+the current data (for example, the confidence intervals seems too wide
+on the previous figure). A common approach is to find the values of
+the parameters that maximize the likelihood of the data. It as easy as
+calling ``m.optimize`` in GPy::
 
-* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
-* Run the optimization
-
-There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
-
-    m.ensure_default_constraints() # or similarly m.constrain_positive('')
-
-but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
-
-    m.unconstrain('')               # may be used to remove the previous constrains
-    m.constrain_positive('.*rbf_variance')
-    m.constrain_bounded('.*lengthscale',1.,10. )
-    m.constrain_fixed('.*noise',0.0025)
-
-Once the constrains have been imposed, the model can be optimized::
-
-    m.optimize()
+  m.optimize()
 
 If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
 
@@ -84,13 +74,15 @@ If we want to perform some restarts to try to improve the result of the optimiza
 
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
 
-    Marginal log-likelihood: 3.603e+01
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  0.8151  |     (+ve)     |        |         
-      rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
-      noise_variance   |  0.0025  |     Fixed     |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : 11.947469082
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |       Value        |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |     0.74229417323  |     +ve      |         |         
+    rbf.lengthscale          |     1.43020495724  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00325654460991  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m2.png
     :align:   center
     :height: 350px
@@ -113,30 +105,36 @@ Here is a 2 dimensional example::
     Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
 
     # define kernel
-    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.White(2)
 
     # create simple GP model
     m = GPy.models.GPRegression(X,Y,ker)
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize and plot
-    m.optimize('tnc', max_f_eval = 1000)
+    m.optimize(max_f_eval = 1000)
     m.plot()
     print(m)
 
 The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
 
-    Marginal log-likelihood: 6.682e+01
-             Name          |  Value   |  Constraints  |  Ties  |  Prior  
-    ---------------------------------------------------------------------
-        Mat52_variance     |  0.3860  |     (+ve)     |        |         
-      Mat52_lengthscale_0  |  2.0578  |     (+ve)     |        |         
-      Mat52_lengthscale_1  |  1.8542  |     (+ve)     |        |         
-        white_variance     |  0.0023  |     (+ve)     |        |         
-        noise variance     |  0.0000  |     (+ve)     |        |         
+  Name                 : GP regression
+  Log-likelihood       : 26.787156248
+  Number of Parameters : 5
+  Parameters:
+    GP_regression.           |        Value        |  Constraint  |  Prior  |  Tied to
+    add.Mat52.variance       |     0.385463739076  |     +ve      |         |         
+    add.Mat52.lengthscale    |               (2,)  |     +ve      |         |         
+    add.white.variance       |  0.000835329608514  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.000835329608514  |     +ve      |         |         
 
+If you want to see the ``ARD`` parameters explicitly print them
+directly::
+
+  >>> print m.add.Mat52.lengthscale
+    Index  |  GP_regression.add.Mat52.lengthscale  |  Constraint  |   Prior   |  Tied to
+     [0]   |                            1.9575587  |     +ve      |           |    N/A    
+     [1]   |                            1.9689948  |     +ve      |           |    N/A    
+  
 .. figure::  Figures/tuto_GP_regression_m3.png
     :align:   center
     :height: 350px
diff --git a/doc/tuto_creating_new_models.rst b/doc/tuto_creating_new_models.rst
index c5196c33..07f6194f 100644
--- a/doc/tuto_creating_new_models.rst
+++ b/doc/tuto_creating_new_models.rst
@@ -20,13 +20,13 @@ input parameters :math:`\mathbf{X}`. Where
 Obligatory methods
 ==================
 
-:py:meth:`~GPy.core.model.Model.__init__` :
+:py:func:`~GPy.core.model.Model.__init__` :
 	Initialize the model with the given parameters. These need to
 	be added to the model by calling
 	`self.add_parameter(<param>)`, where param needs to be a
 	parameter handle (See parameterized_ for details).::
 	
-		self.X = GPy.core.Param("input", X)
+		self.X = GPy.Param("input", X)
 		self.add_parameter(self.X)
 		
 :py:meth:`~GPy.core.model.Model.log_likelihood` :
@@ -41,11 +41,59 @@ Obligatory methods
     each parameter handle in the hierarchy with respect to the
     log_likelihod. Thus here we need to set the negative derivative of
     the rosenbrock function for the parameters. In this case it is the
-    gradient for self.X:
+    gradient for self.X.::
 
  		self.X.gradient = -scipy.optimize.rosen_der(self.X)
 
 
+Here the full code for the `Rosen` class::
+
+  from GPy import Model, Param
+  import scipy
+  class Rosen(Model):
+      def __init__(self, X, name='rosenbrock'):
+          super(Rosen, self).__init__(name=name)
+          self.X = Param("input", X)
+	  self.add_parameter(self.X)
+      def log_likelihood(self):
+          return -scipy.optimize.rosen(self.X)
+      def parameters_changed(self):
+          self.X.gradient = -scipy.optimize.rosen_der(self.X)
+
+In order to test the newly created model, we can check the gradients
+and optimize a standard rosenbrock run::
+
+  >>> m = Rosen(np.array([-1,-1]))
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -404.0
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> m.checkgrad(verbose=True)
+             Name           |     Ratio     |  Difference   |  Analytical   |   Numerical   
+  ------------------------------------------------------------------------------------------
+   rosenbrock.input[[0]]    |   1.000000    |   0.000000    |  -804.000000  |  -804.000000  
+   rosenbrock.input[[1]]    |   1.000000    |   0.000000    |  -400.000000  |  -400.000000  
+  >>> m.optimize()
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -6.52150088871e-15
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> print m.input
+    Index  |  rosenbrock.input  |  Constraint  |   Prior   |  Tied to
+     [0]   |        0.99999994  |              |           |    N/A    
+     [1]   |        0.99999987  |              |           |    N/A    
+  >>> print m.gradient
+  [ -1.91169809e-06,   1.01852309e-06]
+  
+This is the optimium for the 2D Rosenbrock function, as expected, and
+the gradient of the inputs are almost zero.
+
 Optional methods
 ================
 
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 184d9c17..80b2ac77 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -70,7 +70,7 @@ This will print the details of this particular parameter handle::
 When you want to get a closer look into
 multivalue parameters, print them directly::
 
-  print m.inducing_indputs
+  print m.inducing_inputs
 
   Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
   [0 0]  |                  2.7189499  |              |           |    N/A    
@@ -99,7 +99,7 @@ You should see this::
     sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
     inducing inputs          |            (5, 1)  |              |         |         
     rbf.variance             |     1.91644016819  |     +ve      |         |         
-    rbf.lengthscale          |              0.2  |     +ve      |         |         
+    rbf.lengthscale          |               0.2  |     +ve      |         |         
     Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
 This will already have updated the model's inner state, so you can
@@ -121,7 +121,7 @@ values matched by calling `values()` on the returned object::
   >>> print m['.*var'].values()
   [ 2.1500132   0.00242682]
   >>> print m['rbf']
-     Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
+    Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
      [0]   |                  2.1500132  |              |              |    N/A    
     -----  |  sparse_gp.rbf.lengthscale  |  ----------  |  ----------  |  -------
      [0]   |                  2.6782803  |              |              |    N/A    
@@ -205,9 +205,7 @@ Getting the model's log likelihood
 =============================================
 Appart form the printing the model,  the marginal 
 log-likelihood can be obtained by using the function
-``log_likelihood()``. Also, the log-likelihood gradients
-wrt. each parameter can be obtained with the funcion
-``_log_likelihood_gradients()``. ::
+``log_likelihood()``.::
 
     >>> m.log_likelihood()
     array([-152.83377316])
@@ -255,6 +253,28 @@ when reconstraining a parameter, which was already constrained::
 	>>>m.unconstrain()
 	array([6, 7])
 
+If you want to unconstrain only a specific constraint, you can pass it
+as an argument of ``unconstrain(Transformation)`` (:py:class:`~GPy.constraints.Transformation`), or call
+the respective method, such as ``unconstrain_fixed()`` (or
+``unfix()``) to only unfix fixed parameters.::
+
+  >>> m.inducing_input[0].fix()
+  >>> m.unfix()
+  >>> m.rbf.constrain_positive()
+  >>> print m
+  Name                 : sparse gp
+  Log-likelihood       : 620.741066698
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.48329711218  |     +ve      |         |         
+    rbf.lengthscale          |      2.5430947048  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00229714444128  |              |         |         
+
+As you can see, ``unfix()`` only unfixed the inducing_input, and did
+not change the positive constraint of the kernel.
+
 The parameter handles come with default constraints, so you will
 rarely be needing to adjust the constraints of a model. In the rare
 cases of needing to adjust the constraints of a model, or in need of
@@ -263,72 +283,59 @@ fixing some parameters, you can do so with the functions
 
     m['.*var'].constrain_positive()
 
-For convenience, GPy also provides a catch all function 
-which ensures that anything which appears to require 
-positivity is constrianed appropriately::
+Available Constraints
+==============
 
-    m.ensure_default_constraints()
+* :py:meth:`~GPy.constraints.Logexp`
+* :py:meth:`~GPy.constraints.Exponent`
+* :py:meth:`~GPy.constraints.Square`
+* :py:meth:`~GPy.constraints.Logistic`
+* :py:meth:`~GPy.constraints.LogexpNeg`
+* :py:meth:`~GPy.constraints.NegativeExponent`  
+* :py:meth:`~GPy.constraints.NegativeLogexp`
 
-Fixing parameters
-=================
-Parameters values can be fixed using ``constrain_fixed()``. 
-For example we can define the first inducing input to be 
-fixed on zero: ::
-
-    m.constrain_fixed('iip_0',0)
-	
-Bounding parameters
-===================
-Defining bounding constraints is an easily task in GPy too,
-it only requires to use the function ``constrain_bounded()``.
-For example, lets bound inducing inputs 2 and 3 to have
-values between -4 and -1: ::
-
-    m.constrain_bounded('iip_(1|2)',-4,-1)
 
 Tying Parameters
-================
-The values of two or more parameters can be tied together,
-so that they share the same value during optimization.
-The function to do so is ``tie_params()``. For the example
-we are using, it doesn't make sense to tie parameters together,
-however for the sake of the example we will tie the white noise
-and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
-for a proper use of the tying capabilities.::
+============
+Not yet implemented for GPy version 0.6.0
 
-    m.tie_params('.*e_var')
 
 Optimizing the model
 ====================
+
 Once we have finished defining the constraints, 
 we can now optimize the model with the function
 ``optimize``.::
 
-    m.optimize()
+  m.Gaussian_noise.constrain_positive()
+  m.rbf.constrain_positive()
+  m.optimize()
 
-We can print again the model and check the new results.
-The table now shows that ``iip_0_0`` is fixed, ``iip_1_0`` 
-and ``iip_2_0`` are bounded and the kernel parameters are constrained to
-be positive. In addition the table now indicates that
-white_variance and noise_variance are tied together.::
+By deafult, GPy uses the lbfgsb optimizer.
+ 
+Some optional parameters may be discussed here.
 
-	Log-likelihood: 9.967e+01
+* ``optimizer``: which optimizer to use, currently there are ``lbfgsb, fmin_tnc,
+  scg, simplex`` or any unique identifier uniquely identifying an
+  optimizer. Thus, you can say ``m.optimize('bfgs') for using the
+  ``lbfgsb`` optimizer
+* ``messages``: if the optimizer is verbose. Each optimizer has its
+  own way of printing, so do not be confused by differing messages of
+  different optimizers
+* ``max_iters``: Maximum number of iterations to take. Some optimizers
+  see iterations as function calls, others as iterations of the
+  algorithm. Please be advised to look into ``scipy.optimize`` for
+  more instructions, if the number of iterations matter, so you can
+  give the right parameters to ``optimize()``
+* ``gtol``: only for some optimizers. Will determine the convergence
+  criterion, as the tolerance of gradient to finish the optimization.
 
-  	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
-	------------------------------------------------------------------
-	    iip_0_0      |  0.0000   |     Fixed     |        |         
-	    iip_1_0      |  -2.8834  |   (-4, -1)    |        |         
-	    iip_2_0      |  -1.9152  |   (-4, -1)    |        |         
-	    iip_3_0      |  1.5034   |               |        |         
-	    iip_4_0      |  -1.0162  |               |        |         
-	 rbf_variance    |  0.0158   |     (+ve)     |        |         
-	rbf_lengthscale  |  0.9760   |     (+ve)     |        |         
-	white_variance   |  0.0049   |     (+ve)     |  (0)   |         
-	noise_variance   |  0.0049   |     (+ve)     |  (0)   |         
+Further Reading 
+=============== 
 
-
-Further Reading
-===============
-All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read :ref:`creating_new_models`. 
-
-By deafult, GPy uses the scg optimizer. To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+All of the mechansiams for dealing
+with parameters are baked right into GPy.core.model, from which all of
+the classes in GPy.models inherrit. To learn how to construct your own
+model, you might want to read :ref:`creating_new_models`.  If you want
+to learn how to create kernels, please refer to
+:ref:`creating_new_kernels`

From 4543fc3480a3244f71912a053efb52fd3e61dd09 Mon Sep 17 00:00:00 2001
From: Max Zwiessele <ibinbei@gmail.com>
Date: Mon, 8 Sep 2014 08:57:28 +0100
Subject: [PATCH 05/22] [link|unlink_parameter] renaming add_parameter to
 link_parameter

---
 GPy/core/gp.py                                |  4 +--
 GPy/core/model.py                             |  2 +-
 GPy/core/parameterization/parameterized.py    | 25 ++++++++++++-------
 GPy/core/parameterization/variational.py      |  6 ++---
 GPy/core/sparse_gp.py                         |  2 +-
 GPy/core/symbolic.py                          |  2 +-
 GPy/kern/_src/ODE_UY.py                       |  2 +-
 GPy/kern/_src/add.py                          |  8 +++---
 GPy/kern/_src/brownian.py                     |  2 +-
 GPy/kern/_src/coregionalize.py                |  2 +-
 GPy/kern/_src/kern.py                         |  2 +-
 GPy/kern/_src/linear.py                       |  4 +--
 GPy/kern/_src/mlp.py                          |  2 +-
 GPy/kern/_src/periodic.py                     |  2 +-
 GPy/kern/_src/poly.py                         |  2 +-
 GPy/kern/_src/static.py                       |  2 +-
 GPy/kern/_src/stationary.py                   |  4 +--
 GPy/likelihoods/gamma.py                      |  2 +-
 GPy/likelihoods/gaussian.py                   |  2 +-
 GPy/likelihoods/mixed_noise.py                |  2 +-
 GPy/likelihoods/student_t.py                  |  4 +--
 GPy/mappings/linear.py                        |  2 +-
 GPy/models/bayesian_gplvm.py                  |  2 +-
 .../gp_kronecker_gaussian_regression.py       |  6 ++---
 GPy/models/gp_var_gauss.py                    |  6 ++---
 GPy/models/gplvm.py                           |  2 +-
 GPy/models/gradient_checker.py                |  2 +-
 GPy/models/mrd.py                             |  8 +++---
 GPy/testing/kernel_tests.py                   |  6 ++---
 GPy/testing/model_tests.py                    | 14 +++++------
 GPy/testing/observable_tests.py               | 14 +++++------
 GPy/testing/parameterized_tests.py            | 24 +++++++++---------
 GPy/testing/pickle_tests.py                   |  4 +--
 33 files changed, 90 insertions(+), 83 deletions(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 8ce3482c..7b010e6c 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -90,8 +90,8 @@ class GP(Model):
         self.inference_method = inference_method
 
         logger.info("adding kernel and likelihood as parameters")
-        self.add_parameter(self.kern)
-        self.add_parameter(self.likelihood)
+        self.link_parameter(self.kern)
+        self.link_parameter(self.likelihood)
 
     def parameters_changed(self):
         self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
diff --git a/GPy/core/model.py b/GPy/core/model.py
index c4fc7fd5..8c556da2 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -23,7 +23,7 @@ class Model(Parameterized):
         self.preferred_optimizer = 'bfgs'
         from .parameterization.ties_and_remappings import Tie
         self.tie = Tie()
-        self.add_parameter(self.tie, -1)
+        self.link_parameter(self.tie, -1)
         self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
 
     def log_likelihood(self):
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index ba5cdf1a..7b5911a5 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -82,7 +82,7 @@ class Parameterized(Parameterizable):
             self._fixes_ = None
         self._param_slices_ = []
         #self._connect_parameters()
-        self.add_parameters(*parameters)
+        self.link_parameters(*parameters)
 
     def build_pydot(self, G=None):
         import pydot  # @UnresolvedImport
@@ -110,7 +110,7 @@ class Parameterized(Parameterizable):
     #===========================================================================
     # Add remove parameters:
     #===========================================================================
-    def add_parameter(self, param, index=None, _ignore_added_names=False):
+    def link_parameter(self, param, index=None, _ignore_added_names=False):
         """
         :param parameters:  the parameters to add
         :type parameters:   list of or one :py:class:`GPy.core.param.Param`
@@ -122,8 +122,8 @@ class Parameterized(Parameterizable):
         at any given index using the :func:`list.insert` syntax
         """
         if param in self.parameters and index is not None:
-            self.remove_parameter(param)
-            self.add_parameter(param, index)
+            self.unlink_parameter(param)
+            self.link_parameter(param, index)
         # elif param.has_parent():
         #    raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
         elif param not in self.parameters:
@@ -132,7 +132,7 @@ class Parameterized(Parameterizable):
                     if parent is self:
                         raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
                 param.traverse_parents(visit, self)
-                param._parent_.remove_parameter(param)
+                param._parent_.unlink_parameter(param)
             # make sure the size is set
             if index is None:
                 start = sum(p.size for p in self.parameters)
@@ -168,14 +168,14 @@ class Parameterized(Parameterizable):
             raise HierarchyError, """Parameter exists already, try making a copy"""
 
 
-    def add_parameters(self, *parameters):
+    def link_parameters(self, *parameters):
         """
         convenience method for adding several
         parameters without gradient specification
         """
-        [self.add_parameter(p) for p in parameters]
+        [self.link_parameter(p) for p in parameters]
 
-    def remove_parameter(self, param):
+    def unlink_parameter(self, param):
         """
         :param param: param object to remove from being a parameter of this parameterized object.
         """
@@ -206,6 +206,11 @@ class Parameterized(Parameterizable):
         self._highest_parent_._connect_fixes()
         self._highest_parent_._notify_parent_change()
 
+    def add_parameter(self, *args, **kwargs):
+        raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables"
+    def remove_parameter(self, *args, **kwargs):
+        raise DeprecationWarning, "remove_parameter was renamed to link_parameter to avoid confusion of setting variables"
+
     def _connect_parameters(self, ignore_added_names=False):
         # connect parameterlist to this parameterized object
         # This just sets up the right connection for the params objects
@@ -294,7 +299,9 @@ class Parameterized(Parameterizable):
         if hasattr(self, "parameters"):
             try:
                 pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
-                if name in pnames: self.parameters[pnames.index(name)][:] = val; return
+                if name in pnames:
+                    param = self.parameters[pnames.index(name)]
+                    param[:] = val; return
             except AttributeError:
                 pass
         object.__setattr__(self, name, val);
diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index 2afeafbb..251ec7db 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -42,7 +42,7 @@ class SpikeAndSlabPrior(VariationalPrior):
             self.pi = Param('Pi', pi, Logistic(1e-10,1.-1e-10))
         else:
             self.pi = Param('Pi', pi, __fixed__)
-        self.add_parameter(self.pi)
+        self.link_parameter(self.pi)
 
 
     def KL_divergence(self, variational_posterior):
@@ -89,7 +89,7 @@ class VariationalPosterior(Parameterized):
         self.ndim = self.mean.ndim
         self.shape = self.mean.shape
         self.num_data, self.input_dim = self.mean.shape
-        self.add_parameters(self.mean, self.variance)
+        self.link_parameters(self.mean, self.variance)
         self.num_data, self.input_dim = self.mean.shape
         if self.has_uncertain_inputs():
             assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
@@ -156,7 +156,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
         """
         super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
         self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
-        self.add_parameter(self.gamma)
+        self.link_parameter(self.gamma)
 
     def __getitem__(self, s):
         if isinstance(s, (int, slice, tuple, list, np.ndarray)):
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 358db125..6b923609 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -50,7 +50,7 @@ class SparseGP(GP):
 
         GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
         logger.info("Adding Z as parameter")
-        self.add_parameter(self.Z, index=0)
+        self.link_parameter(self.Z, index=0)
 
     def has_uncertain_inputs(self):
         return isinstance(self.X, VariationalPosterior)
diff --git a/GPy/core/symbolic.py b/GPy/core/symbolic.py
index a2d61911..c3e1a52c 100644
--- a/GPy/core/symbolic.py
+++ b/GPy/core/symbolic.py
@@ -127,7 +127,7 @@ class Symbolic_core():
                     val = parameters[theta.name]
             # Add parameter.
             
-            self.add_parameters(Param(theta.name, val, None))
+            self.link_parameters(Param(theta.name, val, None))
             #self._set_attribute(theta.name, )
 
     def eval_parameters_changed(self):
diff --git a/GPy/kern/_src/ODE_UY.py b/GPy/kern/_src/ODE_UY.py
index 510b4f7c..b4a2b42d 100644
--- a/GPy/kern/_src/ODE_UY.py
+++ b/GPy/kern/_src/ODE_UY.py
@@ -17,7 +17,7 @@ class ODE_UY(Kern):
         self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
         self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
 
-        self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
+        self.link_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
 
     def K(self, X, X2=None):
         # model :   a * dy/dt + b * y = U
diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py
index 27f8ebd1..4c72a254 100644
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@@ -18,7 +18,7 @@ class Add(CombinationKernel):
             if isinstance(kern, Add):
                 del subkerns[i]
                 for part in kern.parts[::-1]:
-                    kern.remove_parameter(part)
+                    kern.unlink_parameter(part)
                     subkerns.insert(i, part)
 
         super(Add, self).__init__(subkerns, name)
@@ -171,10 +171,10 @@ class Add(CombinationKernel):
         if isinstance(other, Add):
             other_params = other.parameters[:]
             for p in other_params:
-                other.remove_parameter(p)
-            self.add_parameters(*other_params)
+                other.unlink_parameter(p)
+            self.link_parameters(*other_params)
         else:
-            self.add_parameter(other)
+            self.link_parameter(other)
         self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
         return self
 
diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py
index aeb11fa3..fd79973c 100644
--- a/GPy/kern/_src/brownian.py
+++ b/GPy/kern/_src/brownian.py
@@ -22,7 +22,7 @@ class Brownian(Kern):
         super(Brownian, self).__init__(input_dim, active_dims, name)
 
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameters(self.variance)
+        self.link_parameters(self.variance)
 
     def K(self,X,X2=None):
         if X2 is None:
diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py
index 7eccff3d..fc4a2f33 100644
--- a/GPy/kern/_src/coregionalize.py
+++ b/GPy/kern/_src/coregionalize.py
@@ -50,7 +50,7 @@ class Coregionalize(Kern):
         else:
             assert kappa.shape==(self.output_dim, )
         self.kappa = Param('kappa', kappa, Logexp())
-        self.add_parameters(self.W, self.kappa)
+        self.link_parameters(self.W, self.kappa)
 
     def parameters_changed(self):
         self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py
index d8377ffc..4fcbf31f 100644
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@@ -221,7 +221,7 @@ class CombinationKernel(Kern):
         # initialize the kernel with the full input_dim
         super(CombinationKernel, self).__init__(input_dim, active_dims, name)
         self.extra_dims = extra_dims
-        self.add_parameters(*kernels)
+        self.link_parameters(*kernels)
 
     @property
     def parts(self):
diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py
index c30e344e..9d1a956b 100644
--- a/GPy/kern/_src/linear.py
+++ b/GPy/kern/_src/linear.py
@@ -49,7 +49,7 @@ class Linear(Kern):
                 variances = np.ones(self.input_dim)
 
         self.variances = Param('variances', variances, Logexp())
-        self.add_parameter(self.variances)
+        self.link_parameter(self.variances)
         self.psicomp = PSICOMP_Linear()
 
     @Cache_this(limit=2)
@@ -144,7 +144,7 @@ class LinearFull(Kern):
 
         self.W = Param('W', W)
         self.kappa = Param('kappa', kappa, Logexp())
-        self.add_parameters(self.W, self.kappa)
+        self.link_parameters(self.W, self.kappa)
 
     def K(self, X, X2=None):
         P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
diff --git a/GPy/kern/_src/mlp.py b/GPy/kern/_src/mlp.py
index 0b561d4b..badbd60d 100644
--- a/GPy/kern/_src/mlp.py
+++ b/GPy/kern/_src/mlp.py
@@ -36,7 +36,7 @@ class MLP(Kern):
         self.variance = Param('variance', variance, Logexp())
         self.weight_variance = Param('weight_variance', weight_variance, Logexp())
         self.bias_variance = Param('bias_variance', bias_variance, Logexp())
-        self.add_parameters(self.variance, self.weight_variance, self.bias_variance)
+        self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
 
 
     def K(self, X, X2=None):
diff --git a/GPy/kern/_src/periodic.py b/GPy/kern/_src/periodic.py
index 9f232ab0..e8e16506 100644
--- a/GPy/kern/_src/periodic.py
+++ b/GPy/kern/_src/periodic.py
@@ -33,7 +33,7 @@ class Periodic(Kern):
         self.variance = Param('variance', np.float64(variance), Logexp())
         self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
         self.period = Param('period', np.float64(period), Logexp())
-        self.add_parameters(self.variance, self.lengthscale, self.period)
+        self.link_parameters(self.variance, self.lengthscale, self.period)
 
     def _cos(self, alpha, omega, phase):
         def f(x):
diff --git a/GPy/kern/_src/poly.py b/GPy/kern/_src/poly.py
index d40f805c..4c5f0e93 100644
--- a/GPy/kern/_src/poly.py
+++ b/GPy/kern/_src/poly.py
@@ -14,7 +14,7 @@ class Poly(Kern):
     def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
         super(Poly, self).__init__(input_dim, active_dims, name)
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameter(self.variance)
+        self.link_parameter(self.variance)
         self.order=order
 
     def K(self, X, X2=None):
diff --git a/GPy/kern/_src/static.py b/GPy/kern/_src/static.py
index 7820c634..f4223bf4 100644
--- a/GPy/kern/_src/static.py
+++ b/GPy/kern/_src/static.py
@@ -11,7 +11,7 @@ class Static(Kern):
     def __init__(self, input_dim, variance, active_dims, name):
         super(Static, self).__init__(input_dim, active_dims, name)
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameters(self.variance)
+        self.link_parameters(self.variance)
 
     def Kdiag(self, X):
         ret = np.empty((X.shape[0],), dtype=np.float64)
diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index f7993e82..04427c2c 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -61,7 +61,7 @@ class Stationary(Kern):
         self.lengthscale = Param('lengthscale', lengthscale, Logexp())
         self.variance = Param('variance', variance, Logexp())
         assert self.variance.size==1
-        self.add_parameters(self.variance, self.lengthscale)
+        self.link_parameters(self.variance, self.lengthscale)
 
     def K_of_r(self, r):
         raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
@@ -343,7 +343,7 @@ class RatQuad(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
         super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
         self.power = Param('power', power, Logexp())
-        self.add_parameters(self.power)
+        self.link_parameters(self.power)
 
     def K_of_r(self, r):
         r2 = np.power(r, 2.)
diff --git a/GPy/likelihoods/gamma.py b/GPy/likelihoods/gamma.py
index a6436616..ae85c113 100644
--- a/GPy/likelihoods/gamma.py
+++ b/GPy/likelihoods/gamma.py
@@ -25,7 +25,7 @@ class Gamma(Likelihood):
         super(Gamma, self).__init__(gp_link, 'Gamma')
 
         self.beta = Param('beta', beta)
-        self.add_parameter(self.beta)
+        self.link_parameter(self.beta)
         self.beta.fix()#TODO: gradients!
 
     def pdf_link(self, link_f, y, Y_metadata=None):
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 6f08b4b4..4e10d3ef 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -40,7 +40,7 @@ class Gaussian(Likelihood):
         super(Gaussian, self).__init__(gp_link, name=name)
 
         self.variance = Param('variance', variance, Logexp())
-        self.add_parameter(self.variance)
+        self.link_parameter(self.variance)
 
         if isinstance(gp_link, link_functions.Identity):
             self.log_concave = True
diff --git a/GPy/likelihoods/mixed_noise.py b/GPy/likelihoods/mixed_noise.py
index c2435508..613f069d 100644
--- a/GPy/likelihoods/mixed_noise.py
+++ b/GPy/likelihoods/mixed_noise.py
@@ -14,7 +14,7 @@ class MixedNoise(Likelihood):
         #NOTE at the moment this likelihood only works for using a list of gaussians
         super(Likelihood, self).__init__(name=name)
 
-        self.add_parameters(*likelihoods_list)
+        self.link_parameters(*likelihoods_list)
         self.likelihoods_list = likelihoods_list
         self.log_concave = False
 
diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py
index c057e789..3aeb43e0 100644
--- a/GPy/likelihoods/student_t.py
+++ b/GPy/likelihoods/student_t.py
@@ -29,8 +29,8 @@ class StudentT(Likelihood):
         # sigma2 is not a noise parameter, it is a squared scale.
         self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
         self.v = Param('deg_free', float(deg_free))
-        self.add_parameter(self.sigma2)
-        self.add_parameter(self.v)
+        self.link_parameter(self.sigma2)
+        self.link_parameter(self.v)
         self.v.constrain_fixed()
 
         self.log_concave = False
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index 24a45511..315dfc0e 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -24,7 +24,7 @@ class Linear(Bijective_mapping):
         Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
         self.W = Param('W',np.array((self.input_dim, self.output_dim)))
         self.bias = Param('bias',np.array(self.output_dim))
-        self.add_parameters(self.W, self.bias)
+        self.link_parameters(self.W, self.bias)
 
     def f(self, X):
         return np.dot(X,self.W) + self.bias
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index c9d1c68a..a4227119 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -78,7 +78,7 @@ class BayesianGPLVM(SparseGP):
 
         SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, normalizer=normalizer)
         self.logger.info("Adding X as parameter")
-        self.add_parameter(self.X, index=0)
+        self.link_parameter(self.X, index=0)
 
         if mpi_comm != None:
             from ..util.mpi import divide_data
diff --git a/GPy/models/gp_kronecker_gaussian_regression.py b/GPy/models/gp_kronecker_gaussian_regression.py
index 0e8dab81..434661d2 100644
--- a/GPy/models/gp_kronecker_gaussian_regression.py
+++ b/GPy/models/gp_kronecker_gaussian_regression.py
@@ -35,12 +35,12 @@ class GPKroneckerGaussianRegression(Model):
         self.X2 = ObsAr(X2)
         self.Y = Y
         self.kern1, self.kern2 = kern1, kern2
-        self.add_parameter(self.kern1)
-        self.add_parameter(self.kern2)
+        self.link_parameter(self.kern1)
+        self.link_parameter(self.kern2)
 
         self.likelihood = likelihoods.Gaussian()
         self.likelihood.variance = noise_var
-        self.add_parameter(self.likelihood)
+        self.link_parameter(self.likelihood)
 
         self.num_data1, self.input_dim1 = self.X1.shape
         self.num_data2, self.input_dim2 = self.X2.shape
diff --git a/GPy/models/gp_var_gauss.py b/GPy/models/gp_var_gauss.py
index 68b62443..cd688360 100644
--- a/GPy/models/gp_var_gauss.py
+++ b/GPy/models/gp_var_gauss.py
@@ -32,13 +32,13 @@ class GPVariationalGaussianApproximation(Model):
         if kernel is None:
             kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
         self.kern = kernel
-        self.add_parameter(self.kern)
+        self.link_parameter(self.kern)
         self.num_data, self.input_dim = self.X.shape
 
         self.alpha = Param('alpha', np.zeros(self.num_data))
         self.beta = Param('beta', np.ones(self.num_data))
-        self.add_parameter(self.alpha)
-        self.add_parameter(self.beta)
+        self.link_parameter(self.alpha)
+        self.link_parameter(self.beta)
 
         self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
         self.Ysign = np.where(Y==1, 1, -1).flatten()
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index 8f5432ba..79128270 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -38,7 +38,7 @@ class GPLVM(GP):
 
         super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
         self.X = Param('latent_mean', X)
-        self.add_parameter(self.X, index=0)
+        self.link_parameter(self.X, index=0)
 
     def parameters_changed(self):
         super(GPLVM, self).parameters_changed()
diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py
index b7c78449..74026f8e 100644
--- a/GPy/models/gradient_checker.py
+++ b/GPy/models/gradient_checker.py
@@ -76,7 +76,7 @@ class GradientChecker(Model):
 
         for name, xi in zip(self.names, at_least_one_element(x0)):
             self.__setattr__(name, Param(name, xi))
-            self.add_parameter(self.__getattribute__(name))
+            self.link_parameter(self.__getattribute__(name))
 #         self._param_names = []
 #         for name, shape in zip(self.names, self.shapes):
 #             self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 3acc7c6e..015df7bd 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -129,7 +129,7 @@ class MRD(SparseGP):
         else: likelihoods = likelihoods
 
         self.logger.info("adding X and Z")
-        self.add_parameters(self.X, self.Z)
+        self.link_parameters(self.X, self.Z)
 
         self.bgplvms = []
         self.num_data = Ylist[0].shape[0]
@@ -137,11 +137,11 @@ class MRD(SparseGP):
         for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
             assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
             p = Parameterized(name=n)
-            p.add_parameter(k)
+            p.link_parameter(k)
             p.kern = k
-            p.add_parameter(l)
+            p.link_parameter(l)
             p.likelihood = l
-            self.add_parameter(p)
+            self.link_parameter(p)
             self.bgplvms.append(p)
 
         self.posterior = None
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index a942dc49..83e1085c 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -51,7 +51,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
     """
     def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
-        self.add_parameter(self.kernel)
+        self.link_parameter(self.kernel)
 
     def parameters_changed(self):
         return self.kernel.update_gradients_full(self.dL_dK, self.X, self.X2)
@@ -64,7 +64,7 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
     """
     def __init__(self, kernel=None, dL_dK=None, X=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
-        self.add_parameter(self.kernel)
+        self.link_parameter(self.kernel)
 
     def log_likelihood(self):
         return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
@@ -77,7 +77,7 @@ class Kern_check_dK_dX(Kern_check_model):
     def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
         Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
         self.X = Param('X',X)
-        self.add_parameter(self.X)
+        self.link_parameter(self.X)
 
     def parameters_changed(self):
         self.X.gradient[:] =  self.kernel.gradients_X(self.dL_dK, self.X, self.X2)
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index af4b12e2..42f82121 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -65,28 +65,28 @@ class MiscTests(unittest.TestCase):
         np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[:] = m[''].values()
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[''] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
         m.randomize()
         m2[''] = m['']
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m.kern.lengthscale.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m.Gaussian_noise.randomize()
         m2[:] = m[:]
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
         m['.*var'] = 2
         m2['.*var'] = m['.*var']
-        np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
+        np.testing.assert_almost_equal(m.log_likelihood(), m2.log_likelihood())
 
 
     def test_likelihood_set(self):
diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py
index 05794dc3..fb9112f8 100644
--- a/GPy/testing/observable_tests.py
+++ b/GPy/testing/observable_tests.py
@@ -30,15 +30,15 @@ class Test(unittest.TestCase):
         self.par2 = ParameterizedTest('test model 2')
         self.p = Param('test parameter', numpy.random.normal(1,2,(10,3)))
 
-        self.par.add_parameter(self.p)
-        self.par.add_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
-        self.par.add_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
+        self.par.link_parameter(self.p)
+        self.par.link_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
+        self.par.link_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
 
-        self.par2.add_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
-        self.par2.add_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
+        self.par2.link_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
+        self.par2.link_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
 
-        self.parent.add_parameter(self.par)
-        self.parent.add_parameter(self.par2)
+        self.parent.link_parameter(self.par)
+        self.parent.link_parameter(self.par2)
 
         self._observer_triggered = None
         self._trigger_count = 0
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index f8895b14..a51d9e09 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -37,8 +37,8 @@ class ParameterizedTest(unittest.TestCase):
         self.test1 = GPy.core.Parameterized("test model")
         self.test1.param = self.param
         self.test1.kern = self.rbf+self.white
-        self.test1.add_parameter(self.test1.kern)
-        self.test1.add_parameter(self.param, 0)
+        self.test1.link_parameter(self.test1.kern)
+        self.test1.link_parameter(self.param, 0)
         # print self.test1:
         #=============================================================================
         # test_model.          |    Value    |  Constraint   |  Prior  |  Tied to
@@ -67,11 +67,11 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_fixes(self):
         self.white.fix(warning=False)
-        self.test1.remove_parameter(self.param)
+        self.test1.unlink_parameter(self.param)
         self.assertTrue(self.test1._has_fixes())
         from GPy.core.parameterization.transformations import FIXED, UNFIXED
         self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
-        self.test1.kern.add_parameter(self.white, 0)
+        self.test1.kern.link_parameter(self.white, 0)
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
         self.test1.kern.rbf.fix()
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
@@ -82,7 +82,7 @@ class ParameterizedTest(unittest.TestCase):
     def test_remove_parameter(self):
         from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
         self.white.fix()
-        self.test1.kern.remove_parameter(self.white)
+        self.test1.kern.unlink_parameter(self.white)
         self.assertIs(self.test1._fixes_,None)
 
         self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
@@ -90,7 +90,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
 
-        self.test1.add_parameter(self.white, 0)
+        self.test1.link_parameter(self.white, 0)
         self.assertIs(self.test1.constraints, self.white.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
         self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
@@ -98,7 +98,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertIs(self.white._fixes_,None)
         self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
 
-        self.test1.remove_parameter(self.white)
+        self.test1.unlink_parameter(self.white)
         self.assertIs(self.test1._fixes_,None)
         self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
         self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
@@ -107,11 +107,11 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_remove_parameter_param_array_grad_array(self):
         val = self.test1.kern.param_array.copy()
-        self.test1.kern.remove_parameter(self.white)
+        self.test1.kern.unlink_parameter(self.white)
         self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
 
     def test_add_parameter_already_in_hirarchy(self):
-        self.assertRaises(HierarchyError, self.test1.add_parameter, self.white.parameters[0])
+        self.assertRaises(HierarchyError, self.test1.link_parameter, self.white.parameters[0])
 
     def test_default_constraints(self):
         self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
@@ -119,7 +119,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
         from GPy.core.parameterization.transformations import Logexp
         kern = self.test1.kern
-        self.test1.remove_parameter(kern)
+        self.test1.unlink_parameter(kern)
         self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
 
     def test_constraints(self):
@@ -127,7 +127,7 @@ class ParameterizedTest(unittest.TestCase):
         self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
 
-        self.test1.kern.remove_parameter(self.rbf)
+        self.test1.kern.unlink_parameter(self.rbf)
         self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
 
     def test_constraints_views(self):
@@ -166,7 +166,7 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_add_parameter_in_hierarchy(self):
         from GPy.core import Param
-        self.test1.kern.rbf.add_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
+        self.test1.kern.rbf.link_parameter(Param("NEW", np.random.rand(2), NegativeLogexp()), 1)
         self.assertListEqual(self.test1.constraints[NegativeLogexp()].tolist(), range(self.param.size+1, self.param.size+1 + 2))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logistic(0,1)].tolist(), range(self.param.size))
         self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp(0,1)].tolist(), np.r_[50, 53:55].tolist())
diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py
index d51352fe..dfabe54e 100644
--- a/GPy/testing/pickle_tests.py
+++ b/GPy/testing/pickle_tests.py
@@ -108,7 +108,7 @@ class Test(ListDictTestCase):
         par = toy_rbf_1d_50(optimize=0, plot=0)
         pcopy = par.copy()
         self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
-        self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
+        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
         self.assertSequenceEqual(str(par), str(pcopy))
         self.assertIsNot(par.param_array, pcopy.param_array)
         self.assertIsNot(par.gradient_full, pcopy.gradient_full)
@@ -141,7 +141,7 @@ class Test(ListDictTestCase):
             f.seek(0)
             pcopy = pickle.load(f)
         np.testing.assert_allclose(par.param_array, pcopy.param_array)
-        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
+        np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full, atol=1e-6)
         self.assertSequenceEqual(str(par), str(pcopy))
         self.assert_(pcopy.checkgrad())
 

From 4e83501ea5aef2469845420aab876cce6de176c9 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Mon, 8 Sep 2014 17:22:37 +0100
Subject: [PATCH 06/22] update sparse_gp_mpi for new interface

---
 GPy/core/sparse_gp_mpi.py                                | 9 +++++----
 .../latent_function_inference/var_dtc_parallel.py        | 4 +++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index 73a37862..cecbe667 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -42,10 +42,10 @@ class SparseGP_MPI(SparseGP):
                 assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
                         
         super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
-        self.updates = False
-        self.add_parameter(self.X, index=0)
+        self.update_model(False)
+        self.link_parameter(self.X, index=0)
         if variational_prior is not None:
-            self.add_parameter(variational_prior)
+            self.link_parameter(variational_prior)
 #         self.X.fix()
 
         self.mpi_comm = mpi_comm
@@ -58,7 +58,8 @@ class SparseGP_MPI(SparseGP):
             self.Y_local = self.Y[N_start:N_end]
             print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range)
             mpi_comm.Bcast(self.param_array, root=0)
-        self.updates = True
+        self.update_model(True)
+
 
     def __getstate__(self):
         dc = super(SparseGP_MPI, self).__getstate__()
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index ab4074f4..a7e2a800 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from posterior import Posterior
-from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
+from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
@@ -172,7 +172,9 @@ class VarDTC_minibatch(LatentFunctionInference):
         diag.add(Kmm, self.const_jitter)
         r1 = checkFullRank(Kmm,name='Kmm')
         Lm = jitchol(Kmm)
+        LmInv = dtrtri(Lm)
         
+        #LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
         r2 = checkFullRank(Lambda,name='Lambda')

From 2df978dd2c1d40b51a600419347aa93bd3f30cfa Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Tue, 9 Sep 2014 10:51:26 +0100
Subject: [PATCH 07/22] name can be modified

---
 GPy/kern/_src/hierarchical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/GPy/kern/_src/hierarchical.py b/GPy/kern/_src/hierarchical.py
index 3ca6b444..ac360ec7 100644
--- a/GPy/kern/_src/hierarchical.py
+++ b/GPy/kern/_src/hierarchical.py
@@ -10,11 +10,11 @@ class Hierarchical(Kernpart):
     A kernel part which can reopresent a hierarchy of indepencnce: a generalisation of independent_outputs
 
     """
-    def __init__(self,parts):
+    def __init__(self,parts,name='hierarchy'):
         self.levels = len(parts)
         self.input_dim = parts[0].input_dim + 1
         self.num_params = np.sum([k.num_params for k in parts])
-        self.name = 'hierarchy'
+        self.name = name
         self.parts = parts
 
         self.param_starts = np.hstack((0,np.cumsum([k.num_params for k in self.parts[:-1]])))

From 0f47a6b35feca3bd744601d7a7abec23cfa48432 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Tue, 9 Sep 2014 11:46:19 +0100
Subject: [PATCH 08/22] adapt the numerical stability strategy from VarDTC to
 VarDTC_minibatch

---
 .../var_dtc_parallel.py                       | 37 ++++++++-----------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index a7e2a800..c5cf08d1 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 from posterior import Posterior
-from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri
+from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
 from ...util import diag
 from ...core.parameterization.variational import VariationalPosterior
 import numpy as np
@@ -144,6 +144,7 @@ class VarDTC_minibatch(LatentFunctionInference):
         """
         
         num_data, output_dim = Y.shape 
+        input_dim = Z.shape[0]
         if self.mpi_comm != None:
             num_data_all = np.array(num_data,dtype=np.int32)
             self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
@@ -167,32 +168,23 @@ class VarDTC_minibatch(LatentFunctionInference):
         #======================================================================
         
         from ...util.debug import checkFullRank
-        
+
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
         r1 = checkFullRank(Kmm,name='Kmm')
-        Lm = jitchol(Kmm)
-        LmInv = dtrtri(Lm)
+        KmmInv,Lm,LmInv,_ = pdinv(Kmm)
         
-        #LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
-        LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
+        LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
         r2 = checkFullRank(Lambda,name='Lambda')
-        if (not r1) or (not r2):
-            raise
-        LL = jitchol(Lambda)
-        LL = np.dot(Lm,LL)
-        b,_ = dtrtrs(LL, psi1Y_full.T)
+#         if (not r1) or (not r2):
+#             raise
+        LInv,LL,LLInv,logdet_L = pdinv(Lambda)
+        b = LLInv.dot(LmInv.dot(psi1Y_full.T))
         bbt = np.square(b).sum()
-        v,_ = dtrtrs(LL.T,b,lower=False)
-        vvt = np.einsum('md,od->mo',v,v)
+        v = LmInv.T.dot(LLInv.T.dot(b))
         
-        Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
-        LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
-        KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
-        KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
-        
-        dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
+        dL_dpsi2R = LmInv.T.dot(-LLInv.T.dot(tdot(b)+output_dim*np.eye(input_dim)).dot(LLInv)+output_dim*np.eye(input_dim)).dot(LmInv)/2.
         
         # Cache intermediate results
         self.midRes['dL_dpsi2R'] = dL_dpsi2R
@@ -205,20 +197,21 @@ class VarDTC_minibatch(LatentFunctionInference):
             logL_R = -np.log(beta).sum()
         else:
             logL_R = -num_data*np.log(beta)
-        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
+        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*logdet_L/2.
 
         #======================================================================
         # Compute dL_dKmm
         #======================================================================
         
-        dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
+#         dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
+        dL_dKmm =  dL_dpsi2R - KmmInv.dot(psi2_full).dot(KmmInv)/2.
 
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)
         #======================================================================
         
         if not self.Y_speedup or het_noise:
-            post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
+            post = Posterior(woodbury_inv=LmInv.T.dot(np.eye(input_dim)-LInv).dot(LmInv), woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
         else:
             post = None
         

From 5697a533e7f1912ca180a71aeade309304a9a101 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Tue, 9 Sep 2014 12:17:29 +0100
Subject: [PATCH 09/22] a bug fix for VarDTC_minibatch

---
 GPy/inference/latent_function_inference/var_dtc_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index c5cf08d1..53b31dab 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -204,7 +204,7 @@ class VarDTC_minibatch(LatentFunctionInference):
         #======================================================================
         
 #         dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
-        dL_dKmm =  dL_dpsi2R - KmmInv.dot(psi2_full).dot(KmmInv)/2.
+        dL_dKmm =  dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2.
 
         #======================================================================
         # Compute the Posterior distribution of inducing points p(u|Y)

From 47b12c20a30f22f58fe92af3b0e267b4ce810d2c Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 11 Sep 2014 13:21:35 +0100
Subject: [PATCH 10/22] remove nose from install_requires

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d9a6ab5e..847088ec 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ setup(name = 'GPy',
       package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
       py_modules = ['GPy.__init__'],
       long_description=read('README.md'),
-      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
+      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
       extras_require = {
         'docs':['Sphinx', 'ipython'],
       },

From 97d7fa69551ccc9eb0c8814adaee89b8ad8f01c0 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 11 Sep 2014 14:29:50 +0100
Subject: [PATCH 11/22] add the Windows installation instructions for GPy

---
 doc/GPy.testing.rst  |  8 ++++++++
 doc/index.rst        |  3 +++
 doc/installation.rst | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+)
 create mode 100644 doc/installation.rst

diff --git a/doc/GPy.testing.rst b/doc/GPy.testing.rst
index 2d1132d7..657d0638 100644
--- a/doc/GPy.testing.rst
+++ b/doc/GPy.testing.rst
@@ -84,6 +84,14 @@ GPy.testing.prior_tests module
     :undoc-members:
     :show-inheritance:
 
+GPy.testing.tie_tests module
+----------------------------
+
+.. automodule:: GPy.testing.tie_tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 
 Module contents
 ---------------
diff --git a/doc/index.rst b/doc/index.rst
index 87d80be3..c00f31d3 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -19,6 +19,9 @@ You may also be interested by some examples in the GPy/examples folder.
 Contents:
 
 .. toctree::
+   :maxdepth: 2
+   
+   installation
    GPy
 
 
diff --git a/doc/installation.rst b/doc/installation.rst
new file mode 100644
index 00000000..8059e89a
--- /dev/null
+++ b/doc/installation.rst
@@ -0,0 +1,34 @@
+==============
+ Installation
+==============
+
+
+Linux
+============
+
+
+Windows
+======================
+One easy way to get a Python distribution with the required packages is to use the Anaconda environment from Continuum Analytics.
+
+* Download and install the free version of Anaconda according to your operating system  from `their website <https://store.continuum.io>`_.
+* Open a (new) terminal window:
+
+  * Navigate to Applications/Accessories/cmd, or
+  * open *anaconda Command Prompt* from windows *start*
+
+You should now be able to launch a Python interpreter by typing *ipython* in the terminal. In the ipython prompt, you can check your installation by importing the libraries we will need later:
+::
+    $ import numpy
+    $ import pylab
+
+To install the latest version of GPy, *git* is required. A *git* client on Windows can be found `here <http://git-scm.com/download/win>`_. It is recommened to install with the option "*Use Git from the Windows Command Prompt*". Then, GPy can be installed with the following command
+::
+    pip install git+https://github.com/SheffieldML/GPy.git@devel
+
+Note that some of the functionalities in GPy require a *C/C++* compiler. One option would be to install a MSVC compiler, e.g., an Express Edition can be found `here <http://www.microsoft.com/express/download>`_.
+
+
+MacOSX
+===================================
+

From badacfb5851cf39b26995c89bb50bd419a479c3d Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Thu, 11 Sep 2014 16:18:13 +0100
Subject: [PATCH 12/22] bug fixed in normalization

---
 GPy/core/gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 7b010e6c..42dab1b8 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -51,7 +51,7 @@ class GP(Model):
         assert Y.ndim == 2
         logger.info("initializing Y")
 
-        if normalizer is None:
+        if normalizer is True:
             self.normalizer = MeanNorm()
         elif normalizer is False:
             self.normalizer = None

From e11e294791f9e37e818955c47a78167cda075013 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 11 Sep 2014 16:44:01 +0100
Subject: [PATCH 13/22] changes installtion instructions

---
 doc/installation.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/installation.rst b/doc/installation.rst
index 8059e89a..35352272 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -26,9 +26,6 @@ To install the latest version of GPy, *git* is required. A *git* client on Windo
 ::
     pip install git+https://github.com/SheffieldML/GPy.git@devel
 
-Note that some of the functionalities in GPy require a *C/C++* compiler. One option would be to install a MSVC compiler, e.g., an Express Edition can be found `here <http://www.microsoft.com/express/download>`_.
-
-
 MacOSX
 ===================================
 

From 31f11eda407eac53d97a98deaf9f29c3182826c1 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Thu, 11 Sep 2014 17:29:47 +0100
Subject: [PATCH 14/22] bug fix: param object randomize

---
 GPy/core/parameterization/parameter_core.py |  6 +++---
 GPy/testing/parameterized_tests.py          | 14 ++++++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py
index cae999d9..6256f6b2 100644
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@@ -14,6 +14,7 @@ Observable Pattern for patameterization
 """
 
 from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
+from ...util.misc import param_to_array
 import numpy as np
 import re
 import logging
@@ -740,7 +741,6 @@ class OptimizationHandlable(Indexable):
             self.param_array.flat[f] = p
             [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
              for c, ind in self.constraints.iteritems() if c != __fixed__]
-        self._highest_parent_.tie.propagate_val()
 
         self._optimizer_copy_transformed = False
         self._trigger_params_changed()
@@ -829,11 +829,11 @@ class OptimizationHandlable(Indexable):
         self.update_model(False) # Switch off the updates
         self.optimizer_array = x  # makes sure all of the tied parameters get the same init (since there's only one prior object...)
         # now draw from prior where possible
-        x = self.param_array.copy()
+        x = param_to_array(self.param_array).flat.copy()
         [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
         unfixlist = np.ones((self.size,),dtype=np.bool)
         unfixlist[self.constraints[__fixed__]] = False
-        self.param_array[unfixlist] = x[unfixlist]
+        self.param_array.flat[unfixlist] = x[unfixlist]
         self.update_model(True) 
 
     #===========================================================================
diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py
index a51d9e09..9c212806 100644
--- a/GPy/testing/parameterized_tests.py
+++ b/GPy/testing/parameterized_tests.py
@@ -143,8 +143,9 @@ class ParameterizedTest(unittest.TestCase):
 
     def test_randomize(self):
         ps = self.test1.param.view(np.ndarray).copy()
+        self.test1.param[2:5].fix()
         self.test1.param.randomize()
-        self.assertFalse(np.all(ps==self.test1.param))
+        self.assertFalse(np.all(ps==self.test1.param),str(ps)+str(self.test1.param))
 
     def test_fixing_randomize_parameter_handling(self):
         self.rbf.fix(warning=True)
@@ -152,11 +153,12 @@ class ParameterizedTest(unittest.TestCase):
         self.test1.kern.randomize()
         self.assertEqual(val, self.rbf.variance)
 
-    def test_updates(self):
-        self.test1.update_model(False)
-        val = float(self.rbf.variance)
-        self.test1.kern.randomize()
-        self.assertEqual(val, self.rbf.variance)
+#     def test_updates(self):
+#         # WHAT DO YOU WANT TO TEST HERE?
+#         self.test1.update_model(False)
+#         val = float(self.rbf.variance)
+#         self.test1.kern.randomize()
+#         self.assertEqual(val, self.rbf.variance,str(self.test1))
 
     def test_fixing_optimize(self):
         self.testmodel.kern.lengthscale.fix()

From d7eee6aa005fb9a6bedf36f22d6163ac73181bb6 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Fri, 12 Sep 2014 10:36:01 +0100
Subject: [PATCH 15/22] finish the debug of sparsegp_mpi

---
 GPy/core/sparse_gp_mpi.py                         | 15 +++++++++------
 .../latent_function_inference/var_dtc_parallel.py | 15 +--------------
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/GPy/core/sparse_gp_mpi.py b/GPy/core/sparse_gp_mpi.py
index cecbe667..e7faf7a8 100644
--- a/GPy/core/sparse_gp_mpi.py
+++ b/GPy/core/sparse_gp_mpi.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 from sparse_gp import SparseGP
+from numpy.linalg.linalg import LinAlgError
 from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
 
 import logging
@@ -83,11 +84,7 @@ class SparseGP_MPI(SparseGP):
         if self.mpi_comm != None:
             if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
                 self.mpi_comm.Bcast(np.int32(1),root=0)
-            self.mpi_comm.Bcast(p, root=0)
-            
-        from ..util.debug import checkFinite
-        checkFinite(p, 'optimizer_array')
-        
+            self.mpi_comm.Bcast(p, root=0)        
         SparseGP.optimizer_array.fset(self,p)
         
     def optimize(self, optimizer=None, start=None, **kwargs):
@@ -103,7 +100,13 @@ class SparseGP_MPI(SparseGP):
             while True:
                 self.mpi_comm.Bcast(flag,root=0)
                 if flag==1:
-                    self.optimizer_array = x
+                    try:
+                        self.optimizer_array = x
+                        self._fail_count = 0
+                    except (LinAlgError, ZeroDivisionError, ValueError):
+                        if self._fail_count >= self._allowed_failures:
+                            raise
+                        self._fail_count += 1
                 elif flag==-1:
                     break
                 else:
diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py
index 53b31dab..b9ecbb5c 100644
--- a/GPy/inference/latent_function_inference/var_dtc_parallel.py
+++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py
@@ -167,18 +167,12 @@ class VarDTC_minibatch(LatentFunctionInference):
         # Compute Common Components
         #======================================================================
         
-        from ...util.debug import checkFullRank
-
         Kmm = kern.K(Z).copy()
         diag.add(Kmm, self.const_jitter)
-        r1 = checkFullRank(Kmm,name='Kmm')
         KmmInv,Lm,LmInv,_ = pdinv(Kmm)
         
         LmInvPsi2LmInvT = LmInv.dot(psi2_full).dot(LmInv.T)
         Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
-        r2 = checkFullRank(Lambda,name='Lambda')
-#         if (not r1) or (not r2):
-#             raise
         LInv,LL,LLInv,logdet_L = pdinv(Lambda)
         b = LLInv.dot(LmInv.dot(psi1Y_full.T))
         bbt = np.square(b).sum()
@@ -203,7 +197,6 @@ class VarDTC_minibatch(LatentFunctionInference):
         # Compute dL_dKmm
         #======================================================================
         
-#         dL_dKmm =  -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
         dL_dKmm =  dL_dpsi2R - output_dim*KmmInv.dot(psi2_full).dot(KmmInv)/2.
 
         #======================================================================
@@ -336,13 +329,7 @@ def update_gradients(model, mpi_comm=None):
         Y = model.Y_local
         X = model.X[model.N_range[0]:model.N_range[1]]
 
-    try:
-        model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
-    except Exception:
-        if model.mpi_comm is None or model.mpi_comm.rank==0:
-            import time
-            model.pickle('model_'+str(int(time.time()))+'.pickle')
-        raise
+    model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
     
     het_noise = model.likelihood.variance.size > 1
     

From 049b58c729c38dc9704268df1b577fad0deeb75c Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Fri, 12 Sep 2014 11:51:51 +0100
Subject: [PATCH 16/22] Remove the dependency on matplotlib

---
 GPy/examples/classification.py                  |  6 +++++-
 GPy/examples/coreg_example.py                   |  5 ++++-
 GPy/examples/non_gaussian.py                    |  5 ++++-
 GPy/examples/regression.py                      |  5 ++++-
 GPy/examples/stochastic.py                      |  5 ++++-
 GPy/examples/tutorials.py                       |  7 +++++--
 GPy/kern/_src/rbf.py                            |  2 --
 GPy/kern/_src/trunclinear.py                    |  4 ----
 GPy/models/bcgplvm.py                           |  2 --
 GPy/models/gplvm.py                             |  4 ++--
 GPy/models/sparse_gplvm.py                      |  7 +------
 GPy/plotting/__init__.py                        |  5 ++++-
 GPy/plotting/matplot_dep/base_plots.py          |  7 +++++--
 GPy/plotting/matplot_dep/dim_reduction_plots.py | 10 +++++++---
 GPy/plotting/matplot_dep/inference_plots.py     |  6 ++++--
 GPy/plotting/matplot_dep/mapping_plots.py       |  7 +++++--
 GPy/plotting/matplot_dep/maps.py                | 15 ++++++++-------
 GPy/plotting/matplot_dep/models_plots.py        |  7 +++++--
 GPy/plotting/matplot_dep/priors_plots.py        |  5 ++++-
 GPy/plotting/matplot_dep/ssgplvm.py             |  1 -
 GPy/util/datasets.py                            |  2 +-
 GPy/util/pca.py                                 | 11 ++++++++---
 22 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index ae9d8eb8..2dc5ad53 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -5,9 +5,13 @@
 """
 Gaussian Processes classification
 """
-import pylab as pb
 import GPy
 
+try:
+    import pylab as pb
+except:
+    pass
+
 default_seed = 10000
 
 def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
diff --git a/GPy/examples/coreg_example.py b/GPy/examples/coreg_example.py
index 66ba143d..6ec635eb 100644
--- a/GPy/examples/coreg_example.py
+++ b/GPy/examples/coreg_example.py
@@ -1,5 +1,8 @@
 import numpy as np
-import pylab as pb
+try:
+    import pylab as pb
+except:
+    pass
 import GPy
 pb.ion()
 pb.close('all')
diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py
index c0fcd693..1e2be93b 100644
--- a/GPy/examples/non_gaussian.py
+++ b/GPy/examples/non_gaussian.py
@@ -1,7 +1,10 @@
 import GPy
 import numpy as np
-import matplotlib.pyplot as plt
 from GPy.util import datasets
+try:
+    import matplotlib.pyplot as plt
+except:
+    pass
 
 def student_t_approx(optimize=True, plot=True):
     """
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index c4465061..83bb0453 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -4,7 +4,10 @@
 """
 Gaussian Processes regression examples
 """
-import pylab as pb
+try:
+    import pylab as pb
+except:
+    pass
 import numpy as np
 import GPy
 
diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py
index c302ec7d..cc365cae 100644
--- a/GPy/examples/stochastic.py
+++ b/GPy/examples/stochastic.py
@@ -1,7 +1,10 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import pylab as pb
+try:
+    import pylab as pb
+except:
+    pass
 import numpy as np
 import GPy
 
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index 7825992d..aa82d9f9 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -6,8 +6,11 @@
 Code of Tutorials
 """
 
-import pylab as pb
-pb.ion()
+try:
+    import pylab as pb
+    pb.ion()
+except:
+    pass
 import numpy as np
 import GPy
 
diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py
index 3711738a..62539e6d 100644
--- a/GPy/kern/_src/rbf.py
+++ b/GPy/kern/_src/rbf.py
@@ -20,8 +20,6 @@ class RBF(Stationary):
     _support_GPU = True
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
         super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
-        self.weave_options = {}
-        self.group_spike_prob = False
         self.psicomp = PSICOMP_RBF()
         if self.useGPU:
             self.psicomp = PSICOMP_RBF_GPU()
diff --git a/GPy/kern/_src/trunclinear.py b/GPy/kern/_src/trunclinear.py
index 76ed31f7..4ebd51b6 100644
--- a/GPy/kern/_src/trunclinear.py
+++ b/GPy/kern/_src/trunclinear.py
@@ -3,14 +3,10 @@
 
 
 import numpy as np
-from scipy import weave
 from kern import Kern
-from ...util.linalg import tdot
-from ...util.misc import param_to_array
 from ...core.parameterization import Param
 from ...core.parameterization.transformations import Logexp
 from ...util.caching import Cache_this
-from ...core.parameterization import variational
 from ...util.config import *
 
 class TruncLinear(Kern):
diff --git a/GPy/models/bcgplvm.py b/GPy/models/bcgplvm.py
index f21a01f4..c54ffdf6 100644
--- a/GPy/models/bcgplvm.py
+++ b/GPy/models/bcgplvm.py
@@ -3,8 +3,6 @@
 
 
 import numpy as np
-import pylab as pb
-import sys, pdb
 from ..core import GP
 from ..models import GPLVM
 from ..mappings import *
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index 79128270..4e45ac4a 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -3,7 +3,6 @@
 
 
 import numpy as np
-import pylab as pb
 from .. import kern
 from ..core import GP, Param
 from ..likelihoods import Gaussian
@@ -55,7 +54,7 @@ class GPLVM(GP):
         #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
         J = self.jacobian(X)
         for i in range(X.shape[0]):
-            target[i]=np.sqrt(pb.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
+            target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
         return target
 
     def plot(self):
@@ -63,6 +62,7 @@ class GPLVM(GP):
         pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)  # @UndefinedVariable
         Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
         mu, _ = self.predict(Xnew)
+        import pylab as pb
         pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
 
     def plot_latent(self, labels=None, which_indices=None,
diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py
index 4642e158..251103f4 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/models/sparse_gplvm.py
@@ -3,13 +3,8 @@
 
 
 import numpy as np
-import pylab as pb
-import sys, pdb
+import sys
 from GPy.models.sparse_gp_regression import SparseGPRegression
-from GPy.models.gplvm import GPLVM
-# from .. import kern
-# from ..core import model
-# from ..util.linalg import pdinv, PCA
 
 class SparseGPLVM(SparseGPRegression):
     """
diff --git a/GPy/plotting/__init__.py b/GPy/plotting/__init__.py
index 7a39ca9a..d3a96914 100644
--- a/GPy/plotting/__init__.py
+++ b/GPy/plotting/__init__.py
@@ -1,4 +1,7 @@
 # Copyright (c) 2014, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import matplot_dep
+try:
+    import matplot_dep
+except (ImportError, NameError):
+    print 'Fail to load GPy.plotting.matplot_dep.'
\ No newline at end of file
diff --git a/GPy/plotting/matplot_dep/base_plots.py b/GPy/plotting/matplot_dep/base_plots.py
index db9ab8e4..b4142342 100644
--- a/GPy/plotting/matplot_dep/base_plots.py
+++ b/GPy/plotting/matplot_dep/base_plots.py
@@ -2,8 +2,11 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 
-import Tango
-import pylab as pb
+try:
+    import Tango
+    import pylab as pb
+except:
+    pass
 import numpy as np
 
 def ax_default(fignum, ax):
diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py
index 1d5fdd61..20e8e962 100644
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@@ -1,12 +1,16 @@
-import pylab as pb
+
 import numpy as np
 from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
 from ...util.misc import param_to_array
 from ...core.parameterization.variational import VariationalPosterior
 from .base_plots import x_frame2D
 import itertools
-import Tango
-from matplotlib.cm import get_cmap
+try:
+    import Tango
+    from matplotlib.cm import get_cmap
+    import pylab as pb
+except:
+    pass
 
 def most_significant_input_dimensions(model, which_indices):
     """
diff --git a/GPy/plotting/matplot_dep/inference_plots.py b/GPy/plotting/matplot_dep/inference_plots.py
index 6a3a8a93..c802932c 100644
--- a/GPy/plotting/matplot_dep/inference_plots.py
+++ b/GPy/plotting/matplot_dep/inference_plots.py
@@ -1,8 +1,10 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import pylab as pb
-import sys
+try:
+    import pylab as pb
+except:
+    pass
 #import numpy as np
 #import Tango
 #from base_plots import gpplot, x_frame1D, x_frame2D
diff --git a/GPy/plotting/matplot_dep/mapping_plots.py b/GPy/plotting/matplot_dep/mapping_plots.py
index 3e3ea793..6156687d 100644
--- a/GPy/plotting/matplot_dep/mapping_plots.py
+++ b/GPy/plotting/matplot_dep/mapping_plots.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import pylab as pb
 import numpy as np
-import Tango
+try:
+    import Tango
+    import pylab as pb
+except:
+    pass
 from base_plots import x_frame1D, x_frame2D
 
 
diff --git a/GPy/plotting/matplot_dep/maps.py b/GPy/plotting/matplot_dep/maps.py
index e941ab2d..dbedaa98 100644
--- a/GPy/plotting/matplot_dep/maps.py
+++ b/GPy/plotting/matplot_dep/maps.py
@@ -1,13 +1,14 @@
 import numpy as np
-import pylab as pb
-import matplotlib.patches as patches
-from matplotlib.patches import Polygon
-from matplotlib.collections import PatchCollection
-#from matplotlib import cm
+try:
+    import pylab as pb
+    from matplotlib.patches import Polygon
+    from matplotlib.collections import PatchCollection
+    #from matplotlib import cm
+    pb.ion()
+except:
+    pass
 import re
 
-pb.ion()
-
 def plot(shape_records,facecolor='w',edgecolor='k',linewidths=.5, ax=None,xlims=None,ylims=None):
     """
     Plot the geometry of a shapefile
diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py
index 46a79ad8..509c9485 100644
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@@ -1,9 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-import pylab as pb
+try:
+    import Tango
+    import pylab as pb
+except:
+    pass
 import numpy as np
-import Tango
 from base_plots import gpplot, x_frame1D, x_frame2D
 from ...util.misc import param_to_array
 from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
diff --git a/GPy/plotting/matplot_dep/priors_plots.py b/GPy/plotting/matplot_dep/priors_plots.py
index af999740..8f02a03b 100644
--- a/GPy/plotting/matplot_dep/priors_plots.py
+++ b/GPy/plotting/matplot_dep/priors_plots.py
@@ -3,7 +3,10 @@
 
 
 import numpy as np
-import pylab as pb
+try:
+    import pylab as pb
+except:
+    pass
 
 
 def univariate_plot(prior):
diff --git a/GPy/plotting/matplot_dep/ssgplvm.py b/GPy/plotting/matplot_dep/ssgplvm.py
index 4106e251..ef45a759 100644
--- a/GPy/plotting/matplot_dep/ssgplvm.py
+++ b/GPy/plotting/matplot_dep/ssgplvm.py
@@ -6,7 +6,6 @@ import pylab
 
 from ...models import SSGPLVM
 from img_plots import plot_2D_images
-from ...util.misc import param_to_array
 
 class SSGPLVM_plot(object):
     def __init__(self,model, imgsize):
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index 17b26f31..93a5dceb 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -2,7 +2,6 @@ import csv
 import os
 import copy
 import numpy as np
-import pylab as pb
 import GPy
 import scipy.io
 import cPickle as pickle
@@ -346,6 +345,7 @@ def football_data(season='1314', data_set='football_data'):
     data_resources[data_set_season]['files'] = [files]
     if not data_available(data_set_season):
         download_data(data_set_season)
+    import pylab as pb
     for file in reversed(files):
         filename = os.path.join(data_path, data_set_season, file)
         # rewrite files removing blank rows.
diff --git a/GPy/util/pca.py b/GPy/util/pca.py
index 967d0e1b..046f47d7 100644
--- a/GPy/util/pca.py
+++ b/GPy/util/pca.py
@@ -5,8 +5,11 @@ Created on 10 Sep 2012
 @copyright: Max Zwiessele 2012
 '''
 import numpy
-import pylab
-import matplotlib
+try:
+    import pylab
+    import matplotlib
+except:
+    pass
 from numpy.linalg.linalg import LinAlgError
 
 class pca(object):
@@ -88,13 +91,15 @@ class pca(object):
 
     def plot_2d(self, X, labels=None, s=20, marker='o',
                 dimensions=(0, 1), ax=None, colors=None,
-                fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
+                fignum=None, cmap=None, # @UndefinedVariable
                 ** kwargs):
         """
         Plot dimensions `dimensions` with given labels against each other in 
         PC space. Labels can be any sequence of labels of dimensions X.shape[0].
         Labels can be drawn with a subsequent call to legend()
         """
+        if cmap is None:
+            cmap = matplotlib.cm.jet
         if ax is None:
             fig = pylab.figure(fignum)
             ax = fig.add_subplot(111)

From 33fcd06ccc9c4f387338aa37e916f97aa067aaa3 Mon Sep 17 00:00:00 2001
From: Zhenwen Dai <z.dai@sheffield.ac.uk>
Date: Fri, 12 Sep 2014 11:53:59 +0100
Subject: [PATCH 17/22] change setup.py accordingly.

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 847088ec..5e313732 100644
--- a/setup.py
+++ b/setup.py
@@ -24,9 +24,9 @@ setup(name = 'GPy',
       package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
       py_modules = ['GPy.__init__'],
       long_description=read('README.md'),
-      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      install_requires=['numpy>=1.6', 'scipy>=0.9'],
       extras_require = {
-        'docs':['Sphinx', 'ipython'],
+        'docs':['matplotlib>=1.1','Sphinx','ipython'],
       },
       classifiers=[
       "License :: OSI Approved :: BSD License"],

From ed754823be2f6f6b4ab4a3e82230158ec9c08810 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 15 Sep 2014 09:55:02 +0100
Subject: [PATCH 18/22] NonContiguos tests fixed for Kdiag_dX

---
 GPy/inference/latent_function_inference/dtc.py | 1 +
 GPy/testing/kernel_tests.py                    | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py
index 1b6b1dbd..aa398166 100644
--- a/GPy/inference/latent_function_inference/dtc.py
+++ b/GPy/inference/latent_function_inference/dtc.py
@@ -124,6 +124,7 @@ class vDTC(object):
         v, _ = dtrtrs(L, tmp, lower=1, trans=1)
         tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
         P = tdot(tmp.T)
+        stop
 
         #compute log marginal
         log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 83e1085c..95ad7961 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -215,7 +215,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
     if verbose:
         print("Checking gradients of Kdiag(X) wrt X.")
     try:
-        result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
+        testmodel = Kern_check_dKdiag_dX(kern, X=X)
+        if fixed_X_dims is not None:
+            testmodel.X[:,fixed_X_dims].fix()
+        result = testmodel.checkgrad(verbose=verbose)
     except NotImplementedError:
         result=True
         if verbose:
@@ -346,6 +349,7 @@ class KernelTestsNonContinuous(unittest.TestCase):
         kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
         self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
 
+
     def test_ODE_UY(self):
         kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D])
         X = self.X[self.X[:,-1]!=2]

From ff6361728612802652638f8fc1a419443dab12be Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Tue, 16 Sep 2014 13:20:38 +0100
Subject: [PATCH 19/22] for loop speedup in grdients X

---
 GPy/kern/_src/stationary.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 04427c2c..1694cf48 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -171,7 +171,8 @@ class Stationary(Kern):
 
         #the lower memory way with a loop
         ret = np.empty(X.shape, dtype=np.float64)
-        [np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q]) for q in xrange(self.input_dim)]
+        for q in xrange(self.input_dim):
+            np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
         ret /= self.lengthscale**2
 
         return ret

From 803c345d443b20346b6b75c803093953993b71b7 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 17 Sep 2014 11:12:38 +0100
Subject: [PATCH 20/22] docstring for ExpQuad (thanks Mike O. )

---
 GPy/kern/_src/stationary.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py
index 1694cf48..cc5634e9 100644
--- a/GPy/kern/_src/stationary.py
+++ b/GPy/kern/_src/stationary.py
@@ -310,6 +310,19 @@ class Matern52(Stationary):
 
 
 class ExpQuad(Stationary):
+    """
+    The Exponentiated quadratic covariance function. 
+
+    .. math::
+
+       k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
+
+    notes::
+     - Yes, this is exactly the same as the RBF covariance function, but the
+       RBF implementation also has some features for doing variational kernels
+       (the psi-statistics).
+
+    """
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
         super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 

From 31478d4d593185c09dcf0f4218eecc3fef9dd418 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 17 Sep 2014 11:22:31 +0100
Subject: [PATCH 21/22] improved docsting for optimize

---
 GPy/core/model.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/GPy/core/model.py b/GPy/core/model.py
index 8c556da2..dc0a9f5e 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -213,6 +213,7 @@ class Model(Parameterized):
     def optimize(self, optimizer=None, start=None, **kwargs):
         """
         Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
+        
         kwargs are passed to the optimizer. They can be:
 
         :param max_f_eval: maximum number of function evaluations
@@ -222,7 +223,15 @@ class Model(Parameterized):
         :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
         :type optimizer: string
 
-        TODO: valid args
+        Valid optimizers are:
+          - 'scg': scaled conjugate gradient method, recommended for stability.
+                   See also GPy.inference.optimization.scg
+          - 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc)
+          - 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin),
+          - 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
+          - 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
+
+ 
         """
         if self.is_fixed:
             raise RuntimeError, "Cannot optimize, when everything is fixed"

From 48fb60489160de6fb0e84f6559b85b07dd16e274 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 17 Sep 2014 12:30:56 +0100
Subject: [PATCH 22/22] some improvements to plotting 2d kernels

---
 GPy/plotting/matplot_dep/kernel_plots.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py
index f2082db0..c0bd1599 100644
--- a/GPy/plotting/matplot_dep/kernel_plots.py
+++ b/GPy/plotting/matplot_dep/kernel_plots.py
@@ -100,9 +100,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=Non
     return ax
 
 
-def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
-    if which_parts == 'all':
-        which_parts = [True] * kernel.size
+def plot(kernel, x=None, plot_limits=None, resolution=None, *args, **kwargs):
     if kernel.input_dim == 1:
         if x is None:
             x = np.zeros((1, 1))
@@ -133,7 +131,7 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
             assert x.size == 2, "The size of the fixed variable x is not 2"
             x = x.reshape((1, 2))
 
-        if plot_limits == None:
+        if plot_limits is None:
             xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
         elif len(plot_limits) == 2:
             xmin, xmax = plot_limits
@@ -142,12 +140,10 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
 
         resolution = resolution or 51
         xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
-        xg = np.linspace(xmin[0], xmax[0], resolution)
-        yg = np.linspace(xmin[1], xmax[1], resolution)
         Xnew = np.vstack((xx.flatten(), yy.flatten())).T
-        Kx = kernel.K(Xnew, x, which_parts)
+        Kx = kernel.K(Xnew, x)
         Kx = Kx.reshape(resolution, resolution).T
-        pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable
+        pb.contour(xx, xx, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable
         pb.xlim(xmin[0], xmax[0])
         pb.ylim(xmin[1], xmax[1])
         pb.xlabel("x1")