From b9e897c50deef7d607881a72b3c57ae60b560e05 Mon Sep 17 00:00:00 2001
From: mzwiessele <ibinbei@gmail.com>
Date: Sun, 7 Sep 2014 15:42:03 +0100
Subject: [PATCH] [documentation] updated big parts of the doc

---
 GPy/__init__.py                      |   4 +
 doc/index.rst                        |   2 -
 doc/tuto_GP_regression.rst           | 102 +++++++++++-----------
 doc/tuto_creating_new_models.rst     |  54 +++++++++++-
 doc/tuto_interacting_with_models.rst | 125 ++++++++++++++-------------
 5 files changed, 171 insertions(+), 116 deletions(-)

diff --git a/GPy/__init__.py b/GPy/__init__.py
index 8c5f7ed5..819f54bf 100644
--- a/GPy/__init__.py
+++ b/GPy/__init__.py
@@ -18,6 +18,10 @@ from nose.tools import nottest
 import kern
 import plotting
 
+# Direct imports for convenience:
+from core import Model
+from core.parameterization import Param, Parameterized, ObsAr
+
 @nottest
 def tests():
     Tester(testing).test(verbose=10)
diff --git a/doc/index.rst b/doc/index.rst
index 4d0833a4..87d80be3 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -19,8 +19,6 @@ You may also be interested by some examples in the GPy/examples folder.
 Contents:
 
 .. toctree::
-   :maxdepth: 4
-
    GPy
 
 
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 3d3ab10a..29eefa72 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -23,15 +23,15 @@ Note that the observations Y include some noise.
 
 The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
 
-    kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
+    kernel = GPy.kern.RBF(input_dim=1, variance=1., lengthscale=1.)
 
 The parameter ``input_dim`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Many other kernels are implemented such as:
 
-* linear (``GPy.kern.linear``)
-* exponential kernel (``GPy.kern.exponential``)
-* Matern 3/2 (``GPy.kern.Matern32``)
-* Matern 5/2 (``GPy.kern.Matern52``)
-* spline (``GPy.kern.spline``)
+* linear (:py:class:`~GPy.kern.Linear`)
+* exponential kernel (:py:class:`GPy.kern.Exponential`)
+* Matern 3/2 (:py:class:`GPy.kern.Matern32`)
+* Matern 5/2 (:py:class:`GPy.kern.Matern52`)
+* spline (:py:class:`GPy.kern.Spline`)
 * and many others...
 
 The inputs required for building the model are the observations and the kernel::
@@ -45,38 +45,28 @@ By default, some observation noise is added to the modle. The functions ``print`
 
 gives the following output: ::
 
-    Marginal log-likelihood: -4.479e+00
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  1.0000  |               |        |         
-      rbf_lengthscale  |  1.0000  |               |        |         
-      noise_variance   |  1.0000  |               |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : -22.8178418808
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |  Value  |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |    1.0  |     +ve      |         |         
+    rbf.lengthscale          |    1.0  |     +ve      |         |         
+    Gaussian_noise.variance  |    1.0  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m1.png
     :align:   center
     :height: 350px
 
-    GP regression model before optimization of the parameters. The shaded region corresponds to 95% confidence intervals (ie +/- 2 standard deviation).
+    GP regression model before optimization of the parameters. The shaded region corresponds to ~95% confidence intervals (ie +/- 2 standard deviation).
 
-The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is to find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
+The default values of the kernel parameters may not be relevant for
+the current data (for example, the confidence intervals seems too wide
+on the previous figure). A common approach is to find the values of
+the parameters that maximize the likelihood of the data. It as easy as
+calling ``m.optimize`` in GPy::
 
-* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
-* Run the optimization
-
-There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
-
-    m.ensure_default_constraints() # or similarly m.constrain_positive('')
-
-but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
-
-    m.unconstrain('')               # may be used to remove the previous constrains
-    m.constrain_positive('.*rbf_variance')
-    m.constrain_bounded('.*lengthscale',1.,10. )
-    m.constrain_fixed('.*noise',0.0025)
-
-Once the constrains have been imposed, the model can be optimized::
-
-    m.optimize()
+  m.optimize()
 
 If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
 
@@ -84,13 +74,15 @@ If we want to perform some restarts to try to improve the result of the optimiza
 
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
 
-    Marginal log-likelihood: 3.603e+01
-           Name        |  Value   |  Constraints  |  Ties  |  Prior  
-    -----------------------------------------------------------------
-       rbf_variance    |  0.8151  |     (+ve)     |        |         
-      rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
-      noise_variance   |  0.0025  |     Fixed     |        |         
-
+  Name                 : GP regression
+  Log-likelihood       : 11.947469082
+  Number of Parameters : 3
+  Parameters:
+    GP_regression.           |       Value        |  Constraint  |  Prior  |  Tied to
+    rbf.variance             |     0.74229417323  |     +ve      |         |         
+    rbf.lengthscale          |     1.43020495724  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00325654460991  |     +ve      |         |         
+  
 .. figure::  Figures/tuto_GP_regression_m2.png
     :align:   center
     :height: 350px
@@ -113,30 +105,36 @@ Here is a 2 dimensional example::
     Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
 
     # define kernel
-    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
+    ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.White(2)
 
     # create simple GP model
     m = GPy.models.GPRegression(X,Y,ker)
 
-    # contrain all parameters to be positive
-    m.constrain_positive('')
-
     # optimize and plot
-    m.optimize('tnc', max_f_eval = 1000)
+    m.optimize(max_f_eval = 1000)
     m.plot()
     print(m)
 
 The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic). The output of the last two lines is::
 
-    Marginal log-likelihood: 6.682e+01
-             Name          |  Value   |  Constraints  |  Ties  |  Prior  
-    ---------------------------------------------------------------------
-        Mat52_variance     |  0.3860  |     (+ve)     |        |         
-      Mat52_lengthscale_0  |  2.0578  |     (+ve)     |        |         
-      Mat52_lengthscale_1  |  1.8542  |     (+ve)     |        |         
-        white_variance     |  0.0023  |     (+ve)     |        |         
-        noise variance     |  0.0000  |     (+ve)     |        |         
+  Name                 : GP regression
+  Log-likelihood       : 26.787156248
+  Number of Parameters : 5
+  Parameters:
+    GP_regression.           |        Value        |  Constraint  |  Prior  |  Tied to
+    add.Mat52.variance       |     0.385463739076  |     +ve      |         |         
+    add.Mat52.lengthscale    |               (2,)  |     +ve      |         |         
+    add.white.variance       |  0.000835329608514  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.000835329608514  |     +ve      |         |         
 
+If you want to see the ``ARD`` parameters explicitly print them
+directly::
+
+  >>> print m.add.Mat52.lengthscale
+    Index  |  GP_regression.add.Mat52.lengthscale  |  Constraint  |   Prior   |  Tied to
+     [0]   |                            1.9575587  |     +ve      |           |    N/A    
+     [1]   |                            1.9689948  |     +ve      |           |    N/A    
+  
 .. figure::  Figures/tuto_GP_regression_m3.png
     :align:   center
     :height: 350px
diff --git a/doc/tuto_creating_new_models.rst b/doc/tuto_creating_new_models.rst
index c5196c33..07f6194f 100644
--- a/doc/tuto_creating_new_models.rst
+++ b/doc/tuto_creating_new_models.rst
@@ -20,13 +20,13 @@ input parameters :math:`\mathbf{X}`. Where
 Obligatory methods
 ==================
 
-:py:meth:`~GPy.core.model.Model.__init__` :
+:py:func:`~GPy.core.model.Model.__init__` :
 	Initialize the model with the given parameters. These need to
 	be added to the model by calling
 	`self.add_parameter(<param>)`, where param needs to be a
 	parameter handle (See parameterized_ for details).::
 	
-		self.X = GPy.core.Param("input", X)
+		self.X = GPy.Param("input", X)
 		self.add_parameter(self.X)
 		
 :py:meth:`~GPy.core.model.Model.log_likelihood` :
@@ -41,11 +41,59 @@ Obligatory methods
     each parameter handle in the hierarchy with respect to the
     log_likelihod. Thus here we need to set the negative derivative of
     the rosenbrock function for the parameters. In this case it is the
-    gradient for self.X:
+    gradient for self.X.::
 
  		self.X.gradient = -scipy.optimize.rosen_der(self.X)
 
 
+Here the full code for the `Rosen` class::
+
+  from GPy import Model, Param
+  import scipy
+  class Rosen(Model):
+      def __init__(self, X, name='rosenbrock'):
+          super(Rosen, self).__init__(name=name)
+          self.X = Param("input", X)
+	  self.add_parameter(self.X)
+      def log_likelihood(self):
+          return -scipy.optimize.rosen(self.X)
+      def parameters_changed(self):
+          self.X.gradient = -scipy.optimize.rosen_der(self.X)
+
+In order to test the newly created model, we can check the gradients
+and optimize a standard rosenbrock run::
+
+  >>> m = Rosen(np.array([-1,-1]))
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -404.0
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> m.checkgrad(verbose=True)
+             Name           |     Ratio     |  Difference   |  Analytical   |   Numerical   
+  ------------------------------------------------------------------------------------------
+   rosenbrock.input[[0]]    |   1.000000    |   0.000000    |  -804.000000  |  -804.000000  
+   rosenbrock.input[[1]]    |   1.000000    |   0.000000    |  -400.000000  |  -400.000000  
+  >>> m.optimize()
+  >>> print m
+  Name                 : rosenbrock
+  Log-likelihood       : -6.52150088871e-15
+  Number of Parameters : 2
+  Parameters:
+    rosenbrock.  |  Value  |  Constraint  |  Prior  |  Tied to
+    input        |   (2,)  |              |         |         
+  >>> print m.input
+    Index  |  rosenbrock.input  |  Constraint  |   Prior   |  Tied to
+     [0]   |        0.99999994  |              |           |    N/A    
+     [1]   |        0.99999987  |              |           |    N/A    
+  >>> print m.gradient
+  [ -1.91169809e-06,   1.01852309e-06]
+  
+This is the optimium for the 2D Rosenbrock function, as expected, and
+the gradient of the inputs are almost zero.
+
 Optional methods
 ================
 
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 184d9c17..80b2ac77 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -70,7 +70,7 @@ This will print the details of this particular parameter handle::
 When you want to get a closer look into
 multivalue parameters, print them directly::
 
-  print m.inducing_indputs
+  print m.inducing_inputs
 
   Index  |  sparse_gp.inducing_inputs  |  Constraint  |   Prior   |  Tied to
   [0 0]  |                  2.7189499  |              |           |    N/A    
@@ -99,7 +99,7 @@ You should see this::
     sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
     inducing inputs          |            (5, 1)  |              |         |         
     rbf.variance             |     1.91644016819  |     +ve      |         |         
-    rbf.lengthscale          |              0.2  |     +ve      |         |         
+    rbf.lengthscale          |               0.2  |     +ve      |         |         
     Gaussian_noise.variance  |  0.00269870373421  |     +ve      |         |           
 
 This will already have updated the model's inner state, so you can
@@ -121,7 +121,7 @@ values matched by calling `values()` on the returned object::
   >>> print m['.*var'].values()
   [ 2.1500132   0.00242682]
   >>> print m['rbf']
-     Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
+    Index  |   sparse_gp.rbf.variance    |  Constraint  |    Prior     |  Tied to
      [0]   |                  2.1500132  |              |              |    N/A    
     -----  |  sparse_gp.rbf.lengthscale  |  ----------  |  ----------  |  -------
      [0]   |                  2.6782803  |              |              |    N/A    
@@ -205,9 +205,7 @@ Getting the model's log likelihood
 =============================================
 Appart form the printing the model,  the marginal 
 log-likelihood can be obtained by using the function
-``log_likelihood()``. Also, the log-likelihood gradients
-wrt. each parameter can be obtained with the funcion
-``_log_likelihood_gradients()``. ::
+``log_likelihood()``.::
 
     >>> m.log_likelihood()
     array([-152.83377316])
@@ -255,6 +253,28 @@ when reconstraining a parameter, which was already constrained::
 	>>>m.unconstrain()
 	array([6, 7])
 
+If you want to unconstrain only a specific constraint, you can pass it
+as an argument of ``unconstrain(Transformation)`` (:py:class:`~GPy.constraints.Transformation`), or call
+the respective method, such as ``unconstrain_fixed()`` (or
+``unfix()``) to only unfix fixed parameters.::
+
+  >>> m.inducing_input[0].fix()
+  >>> m.unfix()
+  >>> m.rbf.constrain_positive()
+  >>> print m
+  Name                 : sparse gp
+  Log-likelihood       : 620.741066698
+  Number of Parameters : 8
+  Parameters:
+    sparse_gp.               |       Value        |  Constraint  |  Prior  |  Tied to
+    inducing inputs          |            (5, 1)  |              |         |         
+    rbf.variance             |     1.48329711218  |     +ve      |         |         
+    rbf.lengthscale          |      2.5430947048  |     +ve      |         |         
+    Gaussian_noise.variance  |  0.00229714444128  |              |         |         
+
+As you can see, ``unfix()`` only unfixed the inducing_input, and did
+not change the positive constraint of the kernel.
+
 The parameter handles come with default constraints, so you will
 rarely be needing to adjust the constraints of a model. In the rare
 cases of needing to adjust the constraints of a model, or in need of
@@ -263,72 +283,59 @@ fixing some parameters, you can do so with the functions
 
     m['.*var'].constrain_positive()
 
-For convenience, GPy also provides a catch all function 
-which ensures that anything which appears to require 
-positivity is constrianed appropriately::
+Available Constraints
+==============
 
-    m.ensure_default_constraints()
+* :py:meth:`~GPy.constraints.Logexp`
+* :py:meth:`~GPy.constraints.Exponent`
+* :py:meth:`~GPy.constraints.Square`
+* :py:meth:`~GPy.constraints.Logistic`
+* :py:meth:`~GPy.constraints.LogexpNeg`
+* :py:meth:`~GPy.constraints.NegativeExponent`  
+* :py:meth:`~GPy.constraints.NegativeLogexp`
 
-Fixing parameters
-=================
-Parameters values can be fixed using ``constrain_fixed()``. 
-For example we can define the first inducing input to be 
-fixed on zero: ::
-
-    m.constrain_fixed('iip_0',0)
-	
-Bounding parameters
-===================
-Defining bounding constraints is an easily task in GPy too,
-it only requires to use the function ``constrain_bounded()``.
-For example, lets bound inducing inputs 2 and 3 to have
-values between -4 and -1: ::
-
-    m.constrain_bounded('iip_(1|2)',-4,-1)
 
 Tying Parameters
-================
-The values of two or more parameters can be tied together,
-so that they share the same value during optimization.
-The function to do so is ``tie_params()``. For the example
-we are using, it doesn't make sense to tie parameters together,
-however for the sake of the example we will tie the white noise
-and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
-for a proper use of the tying capabilities.::
+============
+Not yet implemented for GPy version 0.6.0
 
-    m.tie_params('.*e_var')
 
 Optimizing the model
 ====================
+
 Once we have finished defining the constraints, 
 we can now optimize the model with the function
 ``optimize``.::
 
-    m.optimize()
+  m.Gaussian_noise.constrain_positive()
+  m.rbf.constrain_positive()
+  m.optimize()
 
-We can print again the model and check the new results.
-The table now shows that ``iip_0_0`` is fixed, ``iip_1_0`` 
-and ``iip_2_0`` are bounded and the kernel parameters are constrained to
-be positive. In addition the table now indicates that
-white_variance and noise_variance are tied together.::
+By deafult, GPy uses the lbfgsb optimizer.
+ 
+Some optional parameters may be discussed here.
 
-	Log-likelihood: 9.967e+01
+* ``optimizer``: which optimizer to use, currently there are ``lbfgsb, fmin_tnc,
+  scg, simplex`` or any unique identifier uniquely identifying an
+  optimizer. Thus, you can say ``m.optimize('bfgs') for using the
+  ``lbfgsb`` optimizer
+* ``messages``: if the optimizer is verbose. Each optimizer has its
+  own way of printing, so do not be confused by differing messages of
+  different optimizers
+* ``max_iters``: Maximum number of iterations to take. Some optimizers
+  see iterations as function calls, others as iterations of the
+  algorithm. Please be advised to look into ``scipy.optimize`` for
+  more instructions, if the number of iterations matter, so you can
+  give the right parameters to ``optimize()``
+* ``gtol``: only for some optimizers. Will determine the convergence
+  criterion, as the tolerance of gradient to finish the optimization.
 
-  	     Name        |   Value   |  Constraints  |  Ties  |  Prior  
-	------------------------------------------------------------------
-	    iip_0_0      |  0.0000   |     Fixed     |        |         
-	    iip_1_0      |  -2.8834  |   (-4, -1)    |        |         
-	    iip_2_0      |  -1.9152  |   (-4, -1)    |        |         
-	    iip_3_0      |  1.5034   |               |        |         
-	    iip_4_0      |  -1.0162  |               |        |         
-	 rbf_variance    |  0.0158   |     (+ve)     |        |         
-	rbf_lengthscale  |  0.9760   |     (+ve)     |        |         
-	white_variance   |  0.0049   |     (+ve)     |  (0)   |         
-	noise_variance   |  0.0049   |     (+ve)     |  (0)   |         
+Further Reading 
+=============== 
 
-
-Further Reading
-===============
-All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read :ref:`creating_new_models`. 
-
-By deafult, GPy uses the scg optimizer. To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.
+All of the mechansiams for dealing
+with parameters are baked right into GPy.core.model, from which all of
+the classes in GPy.models inherrit. To learn how to construct your own
+model, you might want to read :ref:`creating_new_models`.  If you want
+to learn how to create kernels, please refer to
+:ref:`creating_new_kernels`