From 7071dff030f2ca540d5fb1483027879d4612f6bd Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 13 Jun 2013 06:38:11 +0100
Subject: [PATCH 01/19] Mods to visualize and dimensionality to make stick
 demos work for summer school.

---
 GPy/examples/dimensionality_reduction.py | 39 +++++++++++++++++++++---
 GPy/util/visualize.py                    |  5 +--
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 28ee2bde..429b38de 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -261,6 +261,7 @@ def bgplvm_simulation(optimize='scg',
 
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
     m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
+
     # m.constrain('variance|noise', logexp_clipped())
     m.ensure_default_constraints()
     m['noise'] = Y.var() / 100.
@@ -327,28 +328,56 @@ def brendan_faces():
     data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
     raw_input('Press enter to finish')
-    lvm_visualizer.close()
 
     return m
+def stick_play(range=None, frame_rate=15):
+    data = GPy.util.datasets.stick()
+    # optimize
+    if range==None:
+        Y = data['Y'].copy()
+    else:
+        Y = data['Y'][range[0]:range[1], :].copy()
+    y = Y[0, :]
+    data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
+    GPy.util.visualize.data_play(Y, data_show, frame_rate)
+    return Y
 
 def stick():
     data = GPy.util.datasets.stick()
-    m = GPy.models.GPLVM(data['Y'], 2)
-
     # optimize
+    m = GPy.models.GPLVM(data['Y'], 2)
     m.ensure_default_constraints()
     m.optimize(messages=1, max_f_eval=10000)
     m._set_params(m._get_params())
-
+    plt.clf
     ax = m.plot_latent()
     y = m.likelihood.Y[0, :]
     data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
     lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
     raw_input('Press enter to finish')
-    lvm_visualizer.close()
 
     return m
 
+def stick_bgplvm(model=None):
+    data = GPy.util.datasets.stick()
+    Q = 6
+    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
+    m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20,kernel=kernel)
+    # optimize
+    m.ensure_default_constraints()
+    m.optimize(messages=1, max_f_eval=3000,xtol=1e-300,ftol=1e-300)
+    m._set_params(m._get_params())
+    plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2)
+    plt.sca(latent_axes)
+    m.plot_latent()
+    y = m.likelihood.Y[0, :].copy()
+    data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
+    lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
+    raw_input('Press enter to finish')
+
+    return m
+
+
 def cmu_mocap(subject='35', motion=['01'], in_place=True):
 
     data = GPy.util.datasets.cmu_mocap(subject, motion)
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 11730704..8803d6a3 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -5,7 +5,7 @@ import numpy as np
 import matplotlib as mpl
 import time
 import Image
-#import visual
+import visual
 
 class data_show:
     """
@@ -203,6 +203,7 @@ class lvm_dimselect(lvm):
             self.sense_axes = sense_axes
         self.labels = labels
         lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
+        self.show_sensitivities()
         print "use left and right mouse butons to select dimensions"
 
 
@@ -506,5 +507,5 @@ def data_play(Y, visualizer, frame_rate=30):
     
 
     for y in Y:
-        visualizer.modify(y)
+        visualizer.modify(y[None, :])
         time.sleep(1./float(frame_rate))

From d85aaaea962ad4bcbfe5faecfcf805fdc7039d23 Mon Sep 17 00:00:00 2001
From: Neil Lawrence <lawrennd@gmail.com>
Date: Thu, 13 Jun 2013 06:39:13 +0100
Subject: [PATCH 02/19] Comment import visual in visualize due to issues in
 Windows/OSX.

---
 GPy/util/visualize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index 8803d6a3..529f0eff 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -5,7 +5,7 @@ import numpy as np
 import matplotlib as mpl
 import time
 import Image
-import visual
+#import visual
 
 class data_show:
     """

From 259c81273a4ea8cc1bbb42f8f458e0462b38ad71 Mon Sep 17 00:00:00 2001
From: Nicolas <n.durrande@sheffield.ac.uk>
Date: Thu, 13 Jun 2013 18:14:20 +0100
Subject: [PATCH 03/19] corrected minor bug in Brownian kernel

---
 GPy/kern/Brownian.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py
index 76e103af..bdfa0df5 100644
--- a/GPy/kern/Brownian.py
+++ b/GPy/kern/Brownian.py
@@ -21,7 +21,7 @@ class Brownian(Kernpart):
     def __init__(self,input_dim,variance=1.):
         self.input_dim = input_dim
         assert self.input_dim==1, "Brownian motion in 1D only"
-        self.num_params = 1.
+        self.num_params = 1
         self.name = 'Brownian'
         self._set_params(np.array([variance]).flatten())
 

From 0c12641ecaf1e6ccf162fb265190f72d194ac8a6 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 14 Jun 2013 14:04:53 +0100
Subject: [PATCH 04/19] changes to the efficiency of the sparse GP when there
 are many outputs

---
 GPy/core/sparse_gp.py       | 33 ++++++++++++++++++++++-----------
 GPy/likelihoods/ep.py       |  6 ++++++
 GPy/likelihoods/gaussian.py |  5 ++++-
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index 3183cff0..04119071 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -63,6 +63,7 @@ class SparseGP(GPBase):
 
     def _computations(self):
 
+
         # factor Kmm
         self.Lm = jitchol(self.Kmm)
 
@@ -89,17 +90,18 @@ class SparseGP(GPBase):
         self.B = np.eye(self.num_inducing) + self.A
         self.LB = jitchol(self.B)
 
-        # TODO: make a switch for either first compute psi1V, or VV.T
-        self.psi1V = np.dot(self.psi1.T, self.likelihood.V)
+        #VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
+        self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
 
-        # back substutue C into psi1V
-        tmp, info1 = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
-        self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
+        # back substutue C into psi1Vf
+        tmp, info1 = dtrtrs(self.Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
+        self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
         tmp, info2 = dpotrs(self.LB, tmp, lower=1)
-        self.Cpsi1V, info3 = dtrtrs(self.Lm, tmp, lower=1, trans=1)
+        self.Cpsi1Vf, info3 = dtrtrs(self.Lm, tmp, lower=1, trans=1)
 
         # Compute dL_dKmm
-        tmp = tdot(self._LBi_Lmi_psi1V)
+        tmp = tdot(self._LBi_Lmi_psi1Vf)
+        self.data_fit = np.trace(tmp)
         self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
         tmp = -0.5 * self.DBi_plus_BiPBi
         tmp += -0.5 * self.B * self.output_dim
@@ -108,7 +110,7 @@ class SparseGP(GPBase):
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
         self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
-        self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T).T
+        self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
         dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
 
         if self.likelihood.is_heteroscedastic:
@@ -138,18 +140,18 @@ class SparseGP(GPBase):
             # likelihood is not heterscedatic
             self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
             self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
-            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
+            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - self.data_fit)
 
     def log_likelihood(self):
         """ Compute the (lower bound on the) log marginal likelihood """
         if self.likelihood.is_heteroscedastic:
-            A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
+            A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V*self.likelihood.Y)
             B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
         else:
             A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
             B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
         C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
-        D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
+        D = 0.5 * self.data_fit
         return A + B + C + D + self.likelihood.Z
 
     def _set_params(self, p):
@@ -158,6 +160,7 @@ class SparseGP(GPBase):
         self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
         self._compute_kernel_matrices()
         self._computations()
+        self.Cpsi1V = None
 
     def _get_params(self):
         return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
@@ -224,6 +227,14 @@ class SparseGP(GPBase):
         symmetrify(Bi)
         Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
 
+        if self.Cpsi1V is None:
+            psi1V = np.dot(self.psi1.T,self.likelihood.V)
+            tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0)
+            tmp, _ = dpotrs(self.LB, tmp, lower=1)
+            self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1)
+
+
+
         if X_variance_new is None:
             Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
             mu = np.dot(Kx.T, self.Cpsi1V)
diff --git a/GPy/likelihoods/ep.py b/GPy/likelihoods/ep.py
index fb9a55c7..94f760e9 100644
--- a/GPy/likelihoods/ep.py
+++ b/GPy/likelihoods/ep.py
@@ -34,6 +34,8 @@ class EP(likelihood):
         self.Z = 0
         self.YYT = None
         self.V = self.precision * self.Y
+        self.VVT_factor = self.V
+        self.trYYT = 0.
 
     def restart(self):
         self.tau_tilde = np.zeros(self.N)
@@ -44,6 +46,8 @@ class EP(likelihood):
         self.Z = 0
         self.YYT = None
         self.V = self.precision * self.Y
+        self.VVT_factor = self.V
+        self.trYYT = 0.
 
     def predictive_values(self,mu,var,full_cov):
         if full_cov:
@@ -71,6 +75,8 @@ class EP(likelihood):
         self.covariance_matrix = np.diag(1./self.tau_tilde)
         self.precision = self.tau_tilde[:,None]
         self.V = self.precision * self.Y
+        self.VVT_factor = self.V
+        self.trYYT = np.trace(self.YYT)
 
     def fit_full(self,K):
         """
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 59d8fe86..8dbd8863 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -40,9 +40,11 @@ class Gaussian(likelihood):
         if D > self.N:
             self.YYT = np.dot(self.Y, self.Y.T)
             self.trYYT = np.trace(self.YYT)
+            self.YYT_factor = jitchol(self.YYT)
         else:
             self.YYT = None
             self.trYYT = np.sum(np.square(self.Y))
+            self.YYT_factor = self.Y
 
     def _get_params(self):
         return np.asarray(self._variance)
@@ -53,12 +55,13 @@ class Gaussian(likelihood):
     def _set_params(self, x):
         x = np.float64(x)
         if np.all(self._variance != x):
-            if x == 0.:
+            if x == 0.:#special case of zero noise
                 self.precision = np.inf
                 self.V = None
             else:
                 self.precision = 1. / x
                 self.V = (self.precision) * self.Y
+                self.VVT_factor = self.precision * self.YYT_factor
             self.covariance_matrix = np.eye(self.N) * x
             self._variance = x
 

From 5d283335823e4a4278af9221174ff4aecd488eb0 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Fri, 14 Jun 2013 14:59:18 +0100
Subject: [PATCH 05/19] changed manifest from docs to doc

---
 MANIFEST.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index fe342b5e..c89284cd 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
 include *.txt
-recursive-include docs *.txt
+recursive-include doc *.txt
 include *.md
-recursive-include docs *.md
+recursive-include doc *.md

From 0ca52c2346887a9bee47da748df116eea0f942ea Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 14 Jun 2013 18:49:34 +0100
Subject: [PATCH 06/19] Bug fix in the confusion matrix

---
 GPy/util/classification.py | 43 +++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/GPy/util/classification.py b/GPy/util/classification.py
index 07e725d1..41701949 100644
--- a/GPy/util/classification.py
+++ b/GPy/util/classification.py
@@ -2,29 +2,30 @@ import numpy as np
 
 def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
     """
-    Returns true and false positives in a binary classification problem
-    - Column names: true class of the examples
-    - Row names: classification assigned by the model
+    Returns error rate and true/false positives in a binary classification problem
+    - Actual classes are displayed by column.
+    - Predicted classes are displayed by row.
 
-    p: probabilities estimated for observation of belonging to class '1'
-    labels: observations' class
-    names: classes' names
-    threshold: probability value at which the model allocate an element to each class
-    show: whether the matrix should be shown or not
+    :param p: array of class '1' probabilities.
+    :param labels: array of actual classes.
+    :param names: list of class names, defaults to ['1','0'].
+    :param threshold: probability value used to decide the class.
+    :param show: whether the matrix should be shown or not
+    :type show: False|True
     """
-    p = p.flatten()
-    labels = labels.flatten()
-    N = p.size
-    C = np.ones(N)
-    C[p<threshold] = 0
-    True_1 = float((labels - C)[labels-C==0].shape[0] )
-    False_1 = float((labels - C)[labels-C==-2].shape[0] )
-    True_0 = float((labels - C)[labels-C==-1].shape[0] )
-    False_0 = float((labels - C)[labels-C==1].shape[0] )
+    assert p.size == labels.size, "Arrays p and labels have different dimensions."
+    decision = np.ones((labels.size,1))
+    decision[p<threshold] = 0
+    diff = decision - labels
+    false_0 = diff[diff == -1].size
+    false_1 = diff[diff == 1].size
+    true_1 = np.sum(decision[diff ==0])
+    true_0 = labels.size - true_1 - false_0 - false_1
+    error = (false_1 + false_0)/np.float(labels.size)
     if show:
-        print (True_1 + True_0 + 0.)/N * 100,'% instances correctly classified'
+        print 100. - error * 100,'% instances correctly classified'
         print '%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1])
         print '----------|------------|------------|'
-        print '%-10s|  %-10s|  %-10s| ' % (names[0],True_1,False_0)
-        print '%-10s|  %-10s|  %-10s| ' % (names[1],False_1,True_0)
-    return True_1, False_1, True_0, False_0
+        print '%-10s|  %-10s|  %-10s| ' % (names[0],true_1,false_0)
+        print '%-10s|  %-10s|  %-10s| ' % (names[1],false_1,true_0)
+    return error,true_1, false_1, true_0, false_0

From b452e3eebc7cc28990db1f050bd7a8d2f9810572 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Jun 2013 13:58:51 +0100
Subject: [PATCH 07/19] began to merege the SVIGP code into GPy.

Example is implemented, but the step length is a bit crazy!
---
 GPy/core/__init__.py           |   5 +-
 GPy/core/gp_base.py            |   1 +
 GPy/core/svigp.py              | 466 +++++++++++++++++++++++++++++++++
 GPy/examples/__init__.py       |   1 +
 GPy/examples/stochastic.py     |  41 +++
 GPy/models/__init__.py         |   1 +
 GPy/models/svigp_regression.py |  44 ++++
 7 files changed, 557 insertions(+), 2 deletions(-)
 create mode 100644 GPy/core/svigp.py
 create mode 100644 GPy/examples/stochastic.py
 create mode 100644 GPy/models/svigp_regression.py

diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py
index 32b6c02d..e9e049b0 100644
--- a/GPy/core/__init__.py
+++ b/GPy/core/__init__.py
@@ -4,6 +4,7 @@
 from model import *
 from parameterised import *
 import priors
-from GPy.core.gp import GP
-from GPy.core.sparse_gp import SparseGP
+from gp import GP
+from sparse_gp import SparseGP
 from fitc import FITC
+from svigp import SVIGP
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 0799e0c2..b82f3298 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -29,6 +29,7 @@ class GPBase(Model):
             self._Xscale = np.ones((1, self.input_dim))
 
         super(GPBase, self).__init__()
+        #Model.__init__(self)
         # All leaf nodes should call self._set_params(self._get_params()) at
         # the end
 
diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py
new file mode 100644
index 00000000..1db0e26f
--- /dev/null
+++ b/GPy/core/svigp.py
@@ -0,0 +1,466 @@
+# Copyright (c) 2012, James Hensman and Nicolo' Fusi
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+from .. import kern
+from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
+from ..likelihoods import EP
+from gp_base import GPBase
+from model import Model
+import time
+import sys
+
+
+class SVIGP(GPBase):
+    """
+    Stochastic Variational inference in a Gaussian Process
+
+    :param X: inputs
+    :type X: np.ndarray (N x Q)
+    :param Y: observed data
+    :type Y: np.ndarray of observations (N x D)
+    :param batchsize: the size of a h
+
+    Additional kwargs are used as for a sparse GP. They include
+
+    :param q_u: canonical parameters of the distribution squasehd into a 1D array
+    :type q_u: np.ndarray
+    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :type M: int
+    :param kernel : the kernel/covariance function. See link kernels
+    :type kernel: a GPy kernel
+    :param Z: inducing inputs (optional, see note)
+    :type Z: np.ndarray (M x Q) | None
+    :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
+    :type X_uncertainty: np.ndarray (N x Q) | None
+    :param Zslices: slices for the inducing inputs (see slicing TODO: link)
+    :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
+    :type M: int
+    :param beta: noise precision. TODO> ignore beta if doing EP
+    :type beta: float
+    :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
+    :type normalize_(X|Y): bool
+    """
+
+
+    def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None):
+        GPBase.__init__(self, X, likelihood, kernel, normalize_X=False)
+        self.batchsize=batchsize
+        self.Y = self.likelihood.Y.copy()
+        self.Z = Z
+        self.num_inducing = Z.shape[0]
+
+        self.batchcounter = 0
+        self.epochs = 0
+        self.iterations = 0
+
+        self.vb_steplength = 0.05
+        self.param_steplength = 1e-5
+        self.momentum = 0.9
+
+        if X_variance is None:
+            self.has_uncertain_inputs = False
+        else:
+            self.has_uncertain_inputs = True
+            self.X_variance = X_variance
+
+
+        if q_u is None:
+             q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
+        self.set_vb_param(q_u)
+
+        self._permutation = np.random.permutation(self.num_data)
+        self.load_batch()
+
+        self._param_trace = []
+        self._ll_trace = []
+        self._grad_trace = []
+
+        #set the adaptive steplength parameters
+        self.hbar_t = 0.0
+        self.tau_t = 100.0
+        self.gbar_t = 0.0
+        self.gbar_t1 = 0.0
+        self.gbar_t2 = 0.0
+        self.hbar_tp = 0.0
+        self.tau_tp = 10000.0
+        self.gbar_tp = 0.0
+        self.adapt_param_steplength = True
+        self.adapt_vb_steplength = True
+        self._param_steplength_trace = []
+        self._vb_steplength_trace = []
+
+    def _compute_kernel_matrices(self):
+        # kernel computations, using BGPLVM notation
+        self.Kmm = self.kern.K(self.Z)
+        if self.has_uncertain_inputs:
+            self.psi0 = self.kern.psi0(self.Z, self.X_batch, self.X_variance_batch)
+            self.psi1 = self.kern.psi1(self.Z, self.X_batch, self.X_variance_batch)
+            self.psi2 = self.kern.psi2(self.Z, self.X_batch, self.X_variance_batch)
+        else:
+            self.psi0 = self.kern.Kdiag(self.X_batch)
+            self.psi1 = self.kern.K(self.X_batch, self.Z)
+            self.psi2 = None
+
+    def dL_dtheta(self):
+        dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
+        if self.has_uncertain_inputs:
+            dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
+            dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
+            dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
+        else:
+            dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
+            dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
+        return dL_dtheta
+
+    def _set_params(self, p, computations=True):
+        self.kern._set_params_transformed(p[:self.kern.num_params])
+        self.likelihood._set_params(p[self.kern.num_params:])
+        if computations:
+            self._compute_kernel_matrices()
+            self._computations()
+
+    def _get_params(self):
+        return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
+
+    def _get_param_names(self):
+        return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
+
+    def load_batch(self):
+        """
+        load a batch of data (set self.X_batch and self.likelihood.Y from self.X, self.Y)
+        """
+
+        #if we've seen all the data, start again with them in a new random order
+        if self.batchcounter+self.batchsize > self.num_data:
+            self.batchcounter = 0
+            self.epochs += 1
+            self._permutation = np.random.permutation(self.num_data)
+
+        this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
+
+        self.X_batch = self.X[this_perm]
+        self.likelihood.set_data(self.Y[this_perm])
+        if self.has_uncertain_inputs:
+            self.X_variance_batch = self.X_variance[this_perm]
+
+        self.batchcounter += self.batchsize
+
+        self.data_prop = float(self.batchsize)/self.num_data
+
+        self._compute_kernel_matrices()
+        self._computations()
+
+    def _computations(self,do_Kmm=True, do_Kmm_grad=True):
+        """
+        All of the computations needed. Some are optional, see kwargs.
+        """
+
+        if do_Kmm:
+            self.Lm = jitchol(self.Kmm)
+
+        # The rather complex computations of self.A
+        if self.has_uncertain_inputs:
+            if self.likelihood.is_heteroscedastic:
+                psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
+            else:
+                psi2_beta = self.psi2.sum(0) * self.likelihood.precision
+            evals, evecs = linalg.eigh(psi2_beta)
+            clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
+            tmp = evecs * np.sqrt(clipped_evals)
+        else:
+            if self.likelihood.is_heteroscedastic:
+                tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
+            else:
+                tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
+        tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
+        self.A = tdot(tmp)
+
+        self.V = self.likelihood.precision*self.likelihood.Y
+        self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
+        self.psi1V = np.dot(self.psi1.T, self.V)
+
+        self.B = np.eye(self.num_inducing)*self.data_prop + self.A
+        self.Lambda = backsub_both_sides(self.Lm, self.B.T)
+        self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
+
+        self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
+        self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
+        self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
+
+        # Compute dL_dpsi
+        self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
+        self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
+        self.dL_dpsi1 = self.dL_dpsi1.T
+
+        dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
+        if self.has_uncertain_inputs:
+            self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
+        else:
+            self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
+            self.dL_dpsi2 = None
+
+        # Compute dL_dKmm
+        if do_Kmm_grad:
+            tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
+            tmp += tmp.T
+            tmp += -self.output_dim*self.B
+            tmp += self.data_prop*self.LQL
+            self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
+
+        #Compute the gradient of the log likelihood wrt noise variance
+        self.partial_for_likelihood =  -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
+        self.partial_for_likelihood +=  (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
+
+
+    def log_likelihood(self):
+        """
+        As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
+
+        NB. self.batchsize is the size of the batch, not the size of X_all
+        """
+        assert not self.likelihood.is_heteroscedastic
+        A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
+        B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
+        Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
+        C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
+        C += -0.5*np.sum(self.LQL * self.B)
+        D = -0.5*self.likelihood.precision*self.likelihood.trYYT
+        E = np.sum(self.V*self.projected_mean)
+        return (A+B+C+D+E)/self.data_prop
+
+    def _log_likelihood_gradients(self):
+        return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
+
+    def vb_grad_natgrad(self):
+        """
+        Compute the gradients of the lower bound wrt the canonical and
+        Expectation parameters of u.
+
+        Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
+        """
+
+        # Gradient for eta
+        dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
+        Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
+        dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
+
+        # Gradients for theta
+        S = self.q_u_cov
+        Si = self.q_u_prec
+        m = self.q_u_mean
+        dL_dSi = -mdot(S,dL_dmmT_S, S)
+
+        dL_dmhSi = -2*dL_dSi
+        dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
+
+        return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
+
+
+    def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
+
+        param_step = 0.
+
+        #Iterate!
+        for i in range(iterations):
+
+            #store the current configuration for plotting later
+            self._param_trace.append(self._get_params())
+            self._ll_trace.append(self.log_likelihood() + self.log_prior())
+
+            #load a batch
+            self.load_batch()
+
+            #compute the (stochastic) gradient
+            natgrads = self.vb_grad_natgrad()
+            grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
+            self._grad_trace.append(grads)
+
+            #compute the steps in all parameters
+            vb_step = self.vb_steplength*natgrads[0]
+            if (self.epochs>=1):#only move the parameters after the first epoch
+                param_step = self.momentum*param_step + self.param_steplength*grads
+            else:
+                param_step = 0.
+
+            self.set_vb_param(self.get_vb_param() + vb_step)
+            #Note: don't recompute everything here, wait until the next iteration when we have a new batch
+            self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
+
+            #print messages if desired
+            if i and (not i%print_interval):
+                print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
+                print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
+                sys.stdout.flush()
+
+            #callback
+            if i and not i%callback_interval:
+                callback()
+                time.sleep(0.1)
+
+            if self.epochs > 10:
+                self._adapt_steplength()
+
+            self.iterations += 1
+
+
+    def _adapt_steplength(self):
+        if self.adapt_vb_steplength:
+            # self._adaptive_vb_steplength()
+            self._adaptive_vb_steplength_KL()
+        self._vb_steplength_trace.append(self.vb_steplength)
+        assert self.vb_steplength > 0
+
+        if self.adapt_param_steplength:
+            # self._adaptive_param_steplength()
+            # self._adaptive_param_steplength_log()
+            self._adaptive_param_steplength_from_vb()
+        self._param_steplength_trace.append(self.param_steplength)
+
+    def _adaptive_param_steplength(self):
+        decr_factor = 0.1
+        g_tp = self._transform_gradients(self._log_likelihood_gradients())
+        self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
+        self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
+        new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
+        #- hack
+        new_param_steplength *= decr_factor
+        self.param_steplength = (self.param_steplength + new_param_steplength)/2
+        #-
+        self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
+
+    def _adaptive_param_steplength_log(self):
+        stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
+        self.param_steplength = stp[self.iterations]
+
+    def _adaptive_param_steplength_log2(self):
+        self.param_steplength = (self.iterations + 0.001)**-0.5
+
+    def _adaptive_param_steplength_from_vb(self):
+        self.param_steplength = self.vb_steplength * 0.01
+
+    def _adaptive_vb_steplength(self):
+        decr_factor = 0.1
+        g_t = self.vb_grad_natgrad()[0]
+        self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
+        self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
+        new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
+        #- hack
+        new_vb_steplength *= decr_factor
+        self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
+        #-
+        self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
+
+    def _adaptive_vb_steplength_KL(self):
+        decr_factor = 1 #0.1
+        natgrad = self.vb_grad_natgrad()
+        g_t1 = natgrad[0]
+        g_t2 = natgrad[1]
+        self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
+        self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
+        self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
+        self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
+        self.vb_steplength *= decr_factor
+        self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
+
+    def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
+        """Internal helper function for making predictions, does not account for normalization"""
+
+        #TODO: make this more efficient!
+        self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
+        tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
+
+        if X_variance_new is None:
+            Kx = self.kern.K(X_new,self.Z)
+            mu = np.dot(Kx,self.Kmmi_m)
+            if full_cov:
+                Kxx = self.kern.K(X_new)
+                var = Kxx - mdot(Kx,tmp,Kx.T)
+            else:
+                Kxx = self.kern.Kdiag(X_new)
+                var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
+            return mu, var
+        else:
+            assert X_variance_new.shape == X_new.shape
+            Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
+            mu = np.dot(Kx,self.Kmmi_m)
+            Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
+            psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
+            diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
+            if full_cov:
+                raise NotImplementedError
+            else:
+                return mu, diag_var[:,None]
+
+    def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
+        # normalize X values
+        Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
+        if X_variance_new is not None:
+            X_variance_new = X_variance_new / self._Xscale ** 2
+
+        # here's the actual prediction by the GP model
+        mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
+
+        # now push through likelihood
+        mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
+
+        return mean, var, _025pm, _975pm
+
+
+    def set_vb_param(self,vb_param):
+        """set the distribution q(u) from the canonical parameters"""
+        self.q_u_canonical_flat = vb_param.copy()
+        self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
+
+        self.q_u_prec = -2.*self.q_u_canonical[1]
+        self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
+        self.q_u_Li = q_u_Li
+        self.q_u_logdet = -tmp
+        self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
+
+        self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
+
+
+    def get_vb_param(self):
+        """
+        Return the canonical parameters of the distribution q(u)
+        """
+        return self.q_u_canonical_flat
+
+
+    def plot(self, ax=None, fignum=None, Z_height=None, **kwargs):
+
+        if ax is None:
+            fig = pb.figure(num=fignum)
+            ax = fig.add_subplot(111)
+
+        #horrible hack here:
+        data = self.likelihood.data.copy()
+        self.likelihood.data = self.Y
+        GPBase.plot(self, ax=ax, **kwargs)
+        self.likelihood.data = data
+
+        Zu = self.Z * self._Xscale + self._Xoffset
+        if self.input_dim==1:
+            ax.plot(self.X_batch, self.likelihood.data, 'gx',mew=2)
+            if Z_height is None:
+                Z_height = ax.get_ylim()[0]
+            ax.plot(Zu, np.zeros_like(Zu) + Z_height, 'r|', mew=1.5, markersize=12)
+
+        if self.input_dim==2:
+            ax.scatter(self.X_all[:,0], self.X_all[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
+            ax.plot(Zu[:,0], Zu[:,1], 'w^')
+
+    def plot_traces(self):
+        pb.figure()
+        t = np.array(self._param_trace)
+        pb.subplot(2,1,1)
+        for l,ti in zip(self._get_param_names(),t.T):
+            if not l[:3]=='iip':
+                pb.plot(ti,label=l)
+        pb.legend(loc=0)
+
+        pb.subplot(2,1,2)
+        pb.plot(np.asarray(self._ll_trace),label='stochastic likelihood')
+        pb.legend(loc=0)
diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index 0d73daff..2f74858a 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -5,3 +5,4 @@ import classification
 import regression
 import dimensionality_reduction
 import tutorials
+import stochastic
diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py
new file mode 100644
index 00000000..45418d5c
--- /dev/null
+++ b/GPy/examples/stochastic.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import pylab as pb
+import numpy as np
+import GPy
+
+def toy_1d():
+    N = 2000
+    M = 20
+
+    #create data
+    X = np.linspace(0,32,N)[:,None]
+    Z = np.linspace(0,32,M)[:,None]
+    Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
+
+    m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
+    m.ensure_default_constraints()
+    m.constrain_bounded('noise_variance',1e-3,1e-1)
+
+    m.param_steplength = 1e-4
+
+    fig = pb.figure()
+    ax = fig.add_subplot(111)
+    def cb():
+        ax.cla()
+        m.plot(ax=ax,Z_height=-3)
+        ax.set_ylim(-3,3)
+        fig.canvas.draw()
+
+    m.optimize(500, callback=cb, callback_interval=1)
+
+    m.plot_traces()
+    return m
+
+
+
+
+
+
+
diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py
index f18e89db..885372a1 100644
--- a/GPy/models/__init__.py
+++ b/GPy/models/__init__.py
@@ -4,6 +4,7 @@
 from gp_regression import GPRegression
 from gp_classification import GPClassification
 from sparse_gp_regression import SparseGPRegression
+from svigp_regression import SVIGPRegression
 from sparse_gp_classification import SparseGPClassification
 from fitc_classification import FITCClassification
 from gplvm import GPLVM
diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py
new file mode 100644
index 00000000..8448bf37
--- /dev/null
+++ b/GPy/models/svigp_regression.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2012, James Hensman
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+from ..core import SVIGP
+from .. import likelihoods
+from .. import kern
+
+class SVIGPRegression(SVIGP):
+    """
+    Gaussian Process model for regression
+
+    This is a thin wrapper around the SVIGP class, with a set of sensible defalts
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to rbf+white
+    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_X: False|True
+    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
+    :type normalize_Y: False|True
+    :rtype: model object
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10):
+        # kern defaults to rbf (plus white for stability)
+        if kernel is None:
+            kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
+
+        # Z defaults to a subset of the data
+        if Z is None:
+            i = np.random.permutation(X.shape[0])[:num_inducing]
+            Z = X[i].copy()
+        else:
+            assert Z.shape[1] == X.shape[1]
+
+        # likelihood defaults to Gaussian
+        likelihood = likelihoods.Gaussian(Y, normalize=False)
+
+        SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
+        self.load_batch()

From 068c74a515bf1ffc05e54339a48e38ab4efbd20d Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Mon, 17 Jun 2013 15:04:35 +0100
Subject: [PATCH 08/19] New version number

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4e9873b8..90645e71 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ import os
 from setuptools import setup
 
 # Version number
-version = '0.4.5'
+version = '0.4.6'
 
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()

From 106f9ec8b8d37e184ad5c9475b5295747a09f7cf Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 15:56:11 +0100
Subject: [PATCH 09/19] fixed fixed kernel (aha!)

---
 GPy/kern/fixed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/GPy/kern/fixed.py b/GPy/kern/fixed.py
index 9e8f6226..67baea91 100644
--- a/GPy/kern/fixed.py
+++ b/GPy/kern/fixed.py
@@ -15,7 +15,7 @@ class Fixed(Kernpart):
         self.input_dim = input_dim
         self.fixed_K = K
         self.num_params = 1
-        self.name = 'Fixed'
+        self.name = 'fixed'
         self._set_params(np.array([variance]).flatten())
 
     def _get_params(self):

From 8fd8288fb88b4e14a4150bc23d3a4372d0f60de9 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Mon, 17 Jun 2013 15:57:19 +0100
Subject: [PATCH 10/19] ensure_default_constraints is on by default

---
 GPy/examples/classification.py           |  5 -----
 GPy/examples/dimensionality_reduction.py | 12 ------------
 GPy/examples/regression.py               | 14 --------------
 GPy/examples/stochastic.py               |  1 -
 GPy/examples/tutorials.py                |  3 ---
 GPy/kern/rbf.py                          | 14 +++++++-------
 GPy/models/bayesian_gplvm.py             |  2 +-
 GPy/models/fitc_classification.py        |  2 +-
 GPy/models/gp_classification.py          |  2 +-
 GPy/models/gp_regression.py              |  2 +-
 GPy/models/gplvm.py                      |  2 +-
 GPy/models/mrd.py                        |  2 +-
 GPy/models/sparse_gp_classification.py   |  2 +-
 GPy/models/sparse_gp_regression.py       |  2 +-
 GPy/models/sparse_gplvm.py               |  1 +
 GPy/testing/bgplvm_tests.py              |  5 -----
 GPy/testing/gplvm_tests.py               |  3 ---
 GPy/testing/mrd_tests.py                 |  1 -
 GPy/testing/prior_tests.py               |  3 ---
 GPy/testing/psi_stat_gradient_tests.py   |  2 --
 GPy/testing/sparse_gplvm_tests.py        |  3 ---
 GPy/testing/unit_tests.py                |  4 ----
 22 files changed, 16 insertions(+), 71 deletions(-)

diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 4f1a4ebc..c7daa26b 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -25,7 +25,6 @@ def crescent_data(seed=default_seed): # FIXME
     Y[Y.flatten()==-1] = 0
 
     m = GPy.models.GPClassification(data['X'], Y)
-    m.ensure_default_constraints()
     #m.update_likelihood_approximation()
     #m.optimize()
     m.pseudo_EM()
@@ -75,7 +74,6 @@ def toy_linear_1d_classification(seed=default_seed):
 
     # Model definition
     m = GPy.models.GPClassification(data['X'], Y)
-    m.ensure_default_constraints()
 
     # Optimize
     #m.update_likelihood_approximation()
@@ -106,7 +104,6 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
     m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
     m['.*len']= 4.
 
-    m.ensure_default_constraints()
     # Optimize
     #m.update_likelihood_approximation()
     # Parameters optimization:
@@ -137,7 +134,6 @@ def sparse_crescent_data(num_inducing=10, seed=default_seed):
     Y[Y.flatten()==-1]=0
 
     m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
-    m.ensure_default_constraints()
     m['.*len'] = 10.
     #m.update_likelihood_approximation()
     #m.optimize()
@@ -163,7 +159,6 @@ def FITC_crescent_data(num_inducing=10, seed=default_seed):
 
     m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
     m.constrain_bounded('.*len',1.,1e3)
-    m.ensure_default_constraints()
     m['.*len'] = 3.
     #m.update_likelihood_approximation()
     #m.optimize()
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 28ee2bde..16afe5eb 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -37,7 +37,6 @@ def BGPLVM(seed=default_seed):
     # m.optimize(messages = 1)
     # m.plot()
     # pb.title('After optimisation')
-    m.ensure_default_constraints()
     m.randomize()
     m.checkgrad(verbose=1)
 
@@ -53,7 +52,6 @@ def GPLVM_oil_100(optimize=True):
     m.data_labels = data['Y'].argmax(axis=1)
 
     # optimize
-    m.ensure_default_constraints()
     if optimize:
         m.optimize('scg', messages=1)
 
@@ -108,7 +106,6 @@ def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False
     m.data_colors = c
     m.data_t = t
 
-    m.ensure_default_constraints()
     m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0)
     m['noise_variance'] = Y.var() / 100.
     m['bias_variance'] = 0.05
@@ -134,7 +131,6 @@ def BGPLVM_oil(optimize=True, N=200, Q=10, num_inducing=15, max_f_eval=50, plot=
     m['.*lengt'] = 1. # m.X.var(0).max() / m.X.var(0)
     m['noise'] = Yn.var() / 100.
 
-    m.ensure_default_constraints()
 
     # optimize
     if optimize:
@@ -159,7 +155,6 @@ def oil_100():
     m = GPy.models.GPLVM(data['X'], 2)
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(messages=1, max_iters=2)
 
     # plot
@@ -239,7 +234,6 @@ def bgplvm_simulation_matlab_compare():
 #                        X=mu,
 #                        X_variance=S,
                        _debug=False)
-    m.ensure_default_constraints()
     m.auto_scale_factor = True
     m['noise'] = Y.var() / 100.
     m['linear_variance'] = .01
@@ -262,7 +256,6 @@ def bgplvm_simulation(optimize='scg',
     k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
     m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
     # m.constrain('variance|noise', logexp_clipped())
-    m.ensure_default_constraints()
     m['noise'] = Y.var() / 100.
     m['linear_variance'] = .01
 
@@ -291,7 +284,6 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
     for i, Y in enumerate(Ylist):
         m['{}_noise'.format(i + 1)] = Y.var() / 100.
 
-    m.ensure_default_constraints()
 
     # DEBUG
     # np.seterr("raise")
@@ -319,7 +311,6 @@ def brendan_faces():
     # optimize
     m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
 
-    m.ensure_default_constraints()
     m.optimize('scg', messages=1, max_f_eval=10000)
 
     ax = m.plot_latent(which_indices=(0, 1))
@@ -336,7 +327,6 @@ def stick():
     m = GPy.models.GPLVM(data['Y'], 2)
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(messages=1, max_f_eval=10000)
     m._set_params(m._get_params())
 
@@ -359,7 +349,6 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
     m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(messages=1, max_f_eval=10000)
 
     ax = m.plot_latent()
@@ -391,7 +380,6 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
 #     m.set('iip', Z)
 #     m.set('bias', 1e-4)
 #     # optimize
-#     # m.ensure_default_constraints()
 #
 #     import pdb; pdb.set_trace()
 #     m.optimize('tnc', messages=1)
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 726a9085..21b435e7 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -18,7 +18,6 @@ def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
     m = GPy.models.GPRegression(data['X'],data['Y'])
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
     # plot
     m.plot()
@@ -36,7 +35,6 @@ def rogers_girolami_olympics(optim_iters=100):
     m['rbf_lengthscale'] = 10
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(max_f_eval=optim_iters)
 
     # plot
@@ -52,7 +50,6 @@ def toy_rbf_1d_50(optim_iters=100):
     m = GPy.models.GPRegression(data['X'],data['Y'])
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(max_f_eval=optim_iters)
 
     # plot
@@ -68,7 +65,6 @@ def silhouette(optim_iters=100):
     m = GPy.models.GPRegression(data['X'],data['Y'])
 
     # optimize
-    m.ensure_default_constraints()
     m.optimize(messages=True,max_f_eval=optim_iters)
 
     print(m)
@@ -92,7 +88,6 @@ def coregionalisation_toy2(optim_iters=100):
     m = GPy.models.GPRegression(X,Y,kernel=k)
     m.constrain_fixed('.*rbf_var',1.)
     #m.constrain_positive('.*kappa')
-    m.ensure_default_constraints()
     m.optimize('sim',messages=1,max_f_eval=optim_iters)
 
     pb.figure()
@@ -124,7 +119,6 @@ def coregionalisation_toy(optim_iters=100):
     m = GPy.models.GPRegression(X,Y,kernel=k)
     m.constrain_fixed('.*rbf_var',1.)
     #m.constrain_positive('kappa')
-    m.ensure_default_constraints()
     m.optimize(max_f_eval=optim_iters)
 
     pb.figure()
@@ -162,7 +156,6 @@ def coregionalisation_sparse(optim_iters=100):
     m.constrain_fixed('.*rbf_var',1.)
     m.constrain_fixed('iip')
     m.constrain_bounded('noise_variance',1e-3,1e-1)
-    m.ensure_default_constraints()
     m.optimize_restarts(5, robust=True, messages=1, max_f_eval=optim_iters)
 
     #plotting:
@@ -189,11 +182,9 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
     log_SNRs = np.linspace(-3., 4., resolution)
 
     data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
-    # Sub sample the data to ensure multiple optima
     #data['Y'] = data['Y'][0::2, :]
     #data['X'] = data['X'][0::2, :]
 
-    # Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
     data['Y'] = data['Y'] - np.mean(data['Y'])
 
     lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
@@ -220,7 +211,6 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
         optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
 
         # optimize
-        m.ensure_default_constraints()
         m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters)
 
         optim_point_x[1] = m['rbf_lengthscale']
@@ -273,7 +263,6 @@ def sparse_GP_regression_1D(N = 400, num_inducing = 5, optim_iters=100):
     # create simple GP Model
     m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing)
 
-    m.ensure_default_constraints()
 
     m.checkgrad(verbose=1)
     m.optimize('tnc', messages = 1, max_f_eval=optim_iters)
@@ -294,7 +283,6 @@ def sparse_GP_regression_2D(N = 400, num_inducing = 50, optim_iters=100):
     m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing)
 
     # contrain all parameters to be positive (but not inducing inputs)
-    m.ensure_default_constraints()
     m.set('.*len',2.)
 
     m.checkgrad()
@@ -320,7 +308,6 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
 
     # create simple GP Model - no input uncertainty on this one
     m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
-    m.ensure_default_constraints()
     m.optimize('scg', messages=1, max_f_eval=optim_iters)
     m.plot(ax=axes[0])
     axes[0].set_title('no input uncertainty')
@@ -328,7 +315,6 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
 
     #the same Model with uncertainty
     m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
-    m.ensure_default_constraints()
     m.optimize('scg', messages=1, max_f_eval=optim_iters)
     m.plot(ax=axes[1])
     axes[1].set_title('with input uncertainty')
diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py
index 45418d5c..533904d5 100644
--- a/GPy/examples/stochastic.py
+++ b/GPy/examples/stochastic.py
@@ -15,7 +15,6 @@ def toy_1d():
     Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
 
     m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
-    m.ensure_default_constraints()
     m.constrain_bounded('noise_variance',1e-3,1e-1)
 
     m.param_steplength = 1e-4
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index 6950af37..69fc2aaf 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -24,7 +24,6 @@ def tuto_GP_regression():
     print m
     m.plot()
 
-    m.ensure_default_constraints() 
     m.constrain_positive('')
 
     m.unconstrain('')               # may be used to remove the previous constrains
@@ -135,7 +134,6 @@ def tuto_kernel_overview():
     pb.ylabel("+   ",rotation='horizontal',fontsize='30')
     m.plot(ax=axs, which_parts=[False,False,False,True])
 
-    m.ensure_default_constraints()
     return(m)
 
 
@@ -144,6 +142,5 @@ def model_interaction():
     Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
     k = GPy.kern.rbf(1) + GPy.kern.bias(1)
     m = GPy.models.GPRegression(X, Y, kernel=k)
-    m.ensure_default_constraints()
     return m
 
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 5686d7bd..03b37b01 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -241,9 +241,9 @@ class rbf(Kernpart):
         # here are the "statistics" for psi1 and psi2
         if not np.array_equal(Z, self._Z):
             #Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # num_inducing,num_inducing,input_dim
-            self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # num_inducing,num_inducing,input_dim
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # num_inducing,num_inducing,input_dim
+            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
+            self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
+            self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
             self._Z = Z
 
         if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
@@ -257,12 +257,12 @@ class rbf(Kernpart):
             self._psi1 = self.variance*np.exp(self._psi1_exponent)
 
             #psi2
-            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,num_inducing,num_inducing,input_dim
+            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
             self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
-            #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,num_inducing,num_inducing,input_dim
+            #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
             #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
-            #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,num_inducing,num_inducing
-            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,num_inducing,num_inducing
+            #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
+            self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M,Q
 
             #store matrices for caching
             self._Z, self._mu, self._S = Z, mu,S
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 8043c635..c401f788 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -60,7 +60,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
             self._savedABCD = []
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
 
     @property
     def oldps(self):
diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py
index 65178c8c..f4cf4e8d 100644
--- a/GPy/models/fitc_classification.py
+++ b/GPy/models/fitc_classification.py
@@ -44,4 +44,4 @@ class FITCClassification(FITC):
             assert Z.shape[1]==X.shape[1]
 
         FITC.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py
index 376f0005..c6012988 100644
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@@ -38,4 +38,4 @@ class GPClassification(GP):
                 raise Warning, 'likelihood.data and Y are different.'
 
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index 8d0b02e0..db5d21b2 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -32,4 +32,4 @@ class GPRegression(GP):
         likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
 
         GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py
index e602a59a..44a9d2ce 100644
--- a/GPy/models/gplvm.py
+++ b/GPy/models/gplvm.py
@@ -33,7 +33,7 @@ class GPLVM(GP):
             kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
         likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, normalize_X=False)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
 
     def initialise_latent(self, init, input_dim, Y):
         if init == 'PCA':
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 75c6fee9..1d521e5d 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -79,7 +79,7 @@ class MRD(Model):
         self.MQ = self.num_inducing * self.input_dim
 
         Model.__init__(self)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
 
     @property
     def X(self):
diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py
index 9027ef07..9228fb89 100644
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@@ -44,4 +44,4 @@ class SparseGPClassification(SparseGP):
             assert Z.shape[1]==X.shape[1]
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 432d6e18..0dcef3e0 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -42,4 +42,4 @@ class SparseGPRegression(SparseGP):
         likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
 
         SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
-        self._set_params(self._get_params())
+        self.ensure_default_constraints()
diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py
index ea2f8013..d6f4adb9 100644
--- a/GPy/models/sparse_gplvm.py
+++ b/GPy/models/sparse_gplvm.py
@@ -26,6 +26,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
     def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10):
         X = self.initialise_latent(init, input_dim, Y)
         SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
+        self.ensure_default_constraints()
 
     def _get_param_names(self):
         return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index ff558f6d..6b91d999 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -16,7 +16,6 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -29,7 +28,6 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -42,7 +40,6 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -55,7 +52,6 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -69,7 +65,6 @@ class BGPLVMTests(unittest.TestCase):
         Y -= Y.mean(axis=0)
         k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py
index 8c2ba9fc..ebb5c4e5 100644
--- a/GPy/testing/gplvm_tests.py
+++ b/GPy/testing/gplvm_tests.py
@@ -14,7 +14,6 @@ class GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = GPy.models.GPLVM(Y, input_dim, kernel = k)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -26,7 +25,6 @@ class GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = GPy.models.GPLVM(Y, input_dim, kernel = k)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -38,7 +36,6 @@ class GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = GPy.models.GPLVM(Y, input_dim, kernel = k)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py
index b0137709..40fcb86a 100644
--- a/GPy/testing/mrd_tests.py
+++ b/GPy/testing/mrd_tests.py
@@ -24,7 +24,6 @@ class MRDTests(unittest.TestCase):
         likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
 
         m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
 
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py
index e0226751..c16057db 100644
--- a/GPy/testing/prior_tests.py
+++ b/GPy/testing/prior_tests.py
@@ -14,7 +14,6 @@ class PriorTests(unittest.TestCase):
         y += 0.05*np.random.randn(len(X))
         X, y = X[:, None], y[:, None]
         m = GPy.models.GPRegression(X, y)
-        m.ensure_default_constraints()
         lognormal = GPy.priors.LogGaussian(1, 2)
         m.set_prior('rbf', lognormal)
         m.randomize()
@@ -28,7 +27,6 @@ class PriorTests(unittest.TestCase):
         y += 0.05*np.random.randn(len(X))
         X, y = X[:, None], y[:, None]
         m = GPy.models.GPRegression(X, y)
-        m.ensure_default_constraints()
         Gamma = GPy.priors.Gamma(1, 1)
         m.set_prior('rbf', Gamma)
         m.randomize()
@@ -42,7 +40,6 @@ class PriorTests(unittest.TestCase):
         y += 0.05*np.random.randn(len(X))
         X, y = X[:, None], y[:, None]
         m = GPy.models.GPRegression(X, y)
-        m.ensure_default_constraints()
         gaussian = GPy.priors.Gaussian(1, 1)
         success = False
 
diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py
index c110d270..de670f41 100644
--- a/GPy/testing/psi_stat_gradient_tests.py
+++ b/GPy/testing/psi_stat_gradient_tests.py
@@ -113,7 +113,6 @@ if __name__ == "__main__":
 #         Y -= Y.mean(axis=0)
 #         k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
 #         m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-#         m.ensure_default_constraints()
 #         m.randomize()
 # #         self.assertTrue(m.checkgrad())
         numpy.random.seed(0)
@@ -146,7 +145,6 @@ if __name__ == "__main__":
 #                          num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
         m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
                          num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
-        m3.ensure_default_constraints()
         # + GPy.kern.bias(input_dim))
 #         m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
 #                          num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py
index 6145f350..e27fccff 100644
--- a/GPy/testing/sparse_gplvm_tests.py
+++ b/GPy/testing/sparse_gplvm_tests.py
@@ -15,7 +15,6 @@ class sparse_GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -27,7 +26,6 @@ class sparse_GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
@@ -39,7 +37,6 @@ class sparse_GPLVMTests(unittest.TestCase):
         Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
         k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
-        m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 494ebf19..6e504a69 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -37,7 +37,6 @@ class GradientTests(unittest.TestCase):
         noise = GPy.kern.white(dimension)
         kern = kern + noise
         m = model_fit(X, Y, kernel=kern)
-        m.ensure_default_constraints()
         m.randomize()
         # contrain all parameters to be positive
         self.assertTrue(m.checkgrad())
@@ -150,7 +149,6 @@ class GradientTests(unittest.TestCase):
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
         m = GPy.models.GPLVM(Y, input_dim, kernel=k)
-        m.ensure_default_constraints()
         self.assertTrue(m.checkgrad())
 
     def test_GPLVM_rbf_linear_white_kern_2D(self):
@@ -161,7 +159,6 @@ class GradientTests(unittest.TestCase):
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
         m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
-        m.ensure_default_constraints()
         self.assertTrue(m.checkgrad())
 
     def test_GP_EP_probit(self):
@@ -195,7 +192,6 @@ class GradientTests(unittest.TestCase):
         k = GPy.kern.rbf(1) + GPy.kern.white(1)
         Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
         m = GPy.models.FITCClassification(X, Y=Y)
-        m.ensure_default_constraints()
         m.update_likelihood_approximation()
         self.assertTrue(m.checkgrad())
 

From 908b11c701d155cfe3b79be8e11b31f4c2d74dd9 Mon Sep 17 00:00:00 2001
From: Ricardo <acq11ra@sheffield.ac.uk>
Date: Fri, 14 Jun 2013 18:49:34 +0100
Subject: [PATCH 11/19] Bug fix in the confusion matrix

---
 GPy/util/classification.py | 43 +++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/GPy/util/classification.py b/GPy/util/classification.py
index 07e725d1..41701949 100644
--- a/GPy/util/classification.py
+++ b/GPy/util/classification.py
@@ -2,29 +2,30 @@ import numpy as np
 
 def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True):
     """
-    Returns true and false positives in a binary classification problem
-    - Column names: true class of the examples
-    - Row names: classification assigned by the model
+    Returns error rate and true/false positives in a binary classification problem
+    - Actual classes are displayed by column.
+    - Predicted classes are displayed by row.
 
-    p: probabilities estimated for observation of belonging to class '1'
-    labels: observations' class
-    names: classes' names
-    threshold: probability value at which the model allocate an element to each class
-    show: whether the matrix should be shown or not
+    :param p: array of class '1' probabilities.
+    :param labels: array of actual classes.
+    :param names: list of class names, defaults to ['1','0'].
+    :param threshold: probability value used to decide the class.
+    :param show: whether the matrix should be shown or not
+    :type show: False|True
     """
-    p = p.flatten()
-    labels = labels.flatten()
-    N = p.size
-    C = np.ones(N)
-    C[p<threshold] = 0
-    True_1 = float((labels - C)[labels-C==0].shape[0] )
-    False_1 = float((labels - C)[labels-C==-2].shape[0] )
-    True_0 = float((labels - C)[labels-C==-1].shape[0] )
-    False_0 = float((labels - C)[labels-C==1].shape[0] )
+    assert p.size == labels.size, "Arrays p and labels have different dimensions."
+    decision = np.ones((labels.size,1))
+    decision[p<threshold] = 0
+    diff = decision - labels
+    false_0 = diff[diff == -1].size
+    false_1 = diff[diff == 1].size
+    true_1 = np.sum(decision[diff ==0])
+    true_0 = labels.size - true_1 - false_0 - false_1
+    error = (false_1 + false_0)/np.float(labels.size)
     if show:
-        print (True_1 + True_0 + 0.)/N * 100,'% instances correctly classified'
+        print 100. - error * 100,'% instances correctly classified'
         print '%-10s|  %-10s|  %-10s| ' % ('',names[0],names[1])
         print '----------|------------|------------|'
-        print '%-10s|  %-10s|  %-10s| ' % (names[0],True_1,False_0)
-        print '%-10s|  %-10s|  %-10s| ' % (names[1],False_1,True_0)
-    return True_1, False_1, True_0, False_0
+        print '%-10s|  %-10s|  %-10s| ' % (names[0],true_1,false_0)
+        print '%-10s|  %-10s|  %-10s| ' % (names[1],false_1,true_0)
+    return error,true_1, false_1, true_0, false_0

From 6ee8732cf4bbd9408324c227763bcb8d499cdcfe Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 16:47:36 +0100
Subject: [PATCH 12/19] kernels are now consistent with pep8 and common reason

---
 GPy/kern/__init__.py                         |  3 +-
 GPy/kern/constructors.py                     | 74 +++++++-------------
 GPy/kern/kern.py                             |  5 +-
 GPy/kern/{ => parts}/Brownian.py             |  0
 GPy/kern/{ => parts}/Matern32.py             |  0
 GPy/kern/{ => parts}/Matern52.py             |  0
 GPy/kern/{ => parts}/bias.py                 |  2 +-
 GPy/kern/{ => parts}/coregionalise.py        |  0
 GPy/kern/{ => parts}/exponential.py          |  2 +-
 GPy/kern/{ => parts}/finite_dimensional.py   |  4 +-
 GPy/kern/{ => parts}/fixed.py                |  0
 GPy/kern/{ => parts}/independent_outputs.py  |  0
 GPy/kern/{ => parts}/kernpart.py             |  0
 GPy/kern/{ => parts}/linear.py               |  4 +-
 GPy/kern/{ => parts}/periodic_Matern32.py    |  2 +-
 GPy/kern/{ => parts}/periodic_Matern52.py    |  2 +-
 GPy/kern/{ => parts}/periodic_exponential.py |  2 +-
 GPy/kern/{ => parts}/prod.py                 |  2 +-
 GPy/kern/{ => parts}/prod_orthogonal.py      |  0
 GPy/kern/{ => parts}/rational_quadratic.py   |  2 +-
 GPy/kern/{ => parts}/rbf.py                  |  4 +-
 GPy/kern/{ => parts}/rbfcos.py               |  2 +-
 GPy/kern/{ => parts}/spline.py               |  2 +-
 GPy/kern/{ => parts}/symmetric.py            |  2 +-
 GPy/kern/{ => parts}/sympy_helpers.cpp       |  0
 GPy/kern/{ => parts}/sympy_helpers.h         |  0
 GPy/kern/{ => parts}/sympykern.py            |  0
 GPy/kern/{ => parts}/white.py                |  4 +-
 GPy/testing/kernel_tests.py                  |  4 +-
 29 files changed, 47 insertions(+), 75 deletions(-)
 rename GPy/kern/{ => parts}/Brownian.py (100%)
 rename GPy/kern/{ => parts}/Matern32.py (100%)
 rename GPy/kern/{ => parts}/Matern52.py (100%)
 rename GPy/kern/{ => parts}/bias.py (99%)
 rename GPy/kern/{ => parts}/coregionalise.py (100%)
 rename GPy/kern/{ => parts}/exponential.py (99%)
 rename GPy/kern/{ => parts}/finite_dimensional.py (97%)
 rename GPy/kern/{ => parts}/fixed.py (100%)
 rename GPy/kern/{ => parts}/independent_outputs.py (100%)
 rename GPy/kern/{ => parts}/kernpart.py (100%)
 rename GPy/kern/{ => parts}/linear.py (99%)
 rename GPy/kern/{ => parts}/periodic_Matern32.py (99%)
 rename GPy/kern/{ => parts}/periodic_Matern52.py (99%)
 rename GPy/kern/{ => parts}/periodic_exponential.py (99%)
 rename GPy/kern/{ => parts}/prod.py (99%)
 rename GPy/kern/{ => parts}/prod_orthogonal.py (100%)
 rename GPy/kern/{ => parts}/rational_quadratic.py (98%)
 rename GPy/kern/{ => parts}/rbf.py (99%)
 rename GPy/kern/{ => parts}/rbfcos.py (99%)
 rename GPy/kern/{ => parts}/spline.py (98%)
 rename GPy/kern/{ => parts}/symmetric.py (99%)
 rename GPy/kern/{ => parts}/sympy_helpers.cpp (100%)
 rename GPy/kern/{ => parts}/sympy_helpers.h (100%)
 rename GPy/kern/{ => parts}/sympykern.py (100%)
 rename GPy/kern/{ => parts}/white.py (98%)

diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py
index 97c1d88f..6d9cf07a 100644
--- a/GPy/kern/__init__.py
+++ b/GPy/kern/__init__.py
@@ -1,8 +1,7 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, Fixed, rbfcos, IndependentOutputs
+from constructors import *
 try:
     from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index e2c21f15..697f3554 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -1,33 +1,9 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
 from kern import kern
-
-from rbf import rbf as rbfpart
-from white import white as whitepart
-from linear import linear as linearpart
-from exponential import exponential as exponentialpart
-from Matern32 import Matern32 as Matern32part
-from Matern52 import Matern52 as Matern52part
-from bias import bias as biaspart
-from fixed import Fixed as fixedpart
-from finite_dimensional import finite_dimensional as finite_dimensionalpart
-from spline import spline as splinepart
-from Brownian import Brownian as Brownianpart
-from periodic_exponential import periodic_exponential as periodic_exponentialpart
-from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
-from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
-from prod import prod as prodpart
-from symmetric import symmetric as symmetric_part
-from coregionalise import Coregionalise as coregionalise_part
-from rational_quadratic import rational_quadratic as rational_quadraticpart
-from rbfcos import rbfcos as rbfcospart
-from independent_outputs import IndependentOutputs as independent_output_part
-#TODO these s=constructors are not as clean as we'd like. Tidy the code up
-#using meta-classes to make the objects construct properly wthout them.
-
+import parts
 
 def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
     """
@@ -42,7 +18,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
     :param ARD: Auto Relevance Determination (one lengthscale per dimension)
     :type ARD: Boolean
     """
-    part = rbfpart(input_dim,variance,lengthscale,ARD)
+    part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
     return kern(input_dim, [part])
 
 def linear(input_dim,variances=None,ARD=False):
@@ -55,7 +31,7 @@ def linear(input_dim,variances=None,ARD=False):
      variances (np.ndarray)
      ARD (boolean)
     """
-    part = linearpart(input_dim,variances,ARD)
+    part = parts.linear.Linear(input_dim,variances,ARD)
     return kern(input_dim, [part])
 
 def white(input_dim,variance=1.):
@@ -67,7 +43,7 @@ def white(input_dim,variance=1.):
     input_dimD (int), obligatory
      variance (float)
     """
-    part = whitepart(input_dim,variance)
+    part = parts.white.White(input_dim,variance)
     return kern(input_dim, [part])
 
 def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
@@ -83,7 +59,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
     :param ARD: Auto Relevance Determination (one lengthscale per dimension)
     :type ARD: Boolean
     """
-    part = exponentialpart(input_dim,variance, lengthscale, ARD)
+    part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
     return kern(input_dim, [part])
 
 def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
@@ -99,7 +75,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
     :param ARD: Auto Relevance Determination (one lengthscale per dimension)
     :type ARD: Boolean
     """
-    part = Matern32part(input_dim,variance, lengthscale, ARD)
+    part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
     return kern(input_dim, [part])
 
 def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
@@ -115,7 +91,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
     :param ARD: Auto Relevance Determination (one lengthscale per dimension)
     :type ARD: Boolean
     """
-    part = Matern52part(input_dim, variance, lengthscale, ARD)
+    part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
     return kern(input_dim, [part])
 
 def bias(input_dim, variance=1.):
@@ -127,7 +103,7 @@ def bias(input_dim, variance=1.):
      input_dim (int), obligatory
      variance (float)
     """
-    part = biaspart(input_dim, variance)
+    part = parts.bias.Bias(input_dim, variance)
     return kern(input_dim, [part])
 
 def finite_dimensional(input_dim, F, G, variances=1., weights=None):
@@ -138,7 +114,7 @@ def finite_dimensional(input_dim, F, G, variances=1., weights=None):
     G: np.array with shape (n,n) - the Gram matrix associated to F
     variances : np.ndarray with shape (n,)
     """
-    part = finite_dimensionalpart(input_dim, F, G, variances, weights)
+    part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
     return kern(input_dim, [part])
 
 def spline(input_dim, variance=1.):
@@ -150,7 +126,7 @@ def spline(input_dim, variance=1.):
     :param variance: the variance of the kernel
     :type variance: float
     """
-    part = splinepart(input_dim, variance)
+    part = parts.spline.Spline(input_dim, variance)
     return kern(input_dim, [part])
 
 def Brownian(input_dim, variance=1.):
@@ -162,7 +138,7 @@ def Brownian(input_dim, variance=1.):
     :param variance: the variance of the kernel
     :type variance: float
     """
-    part = Brownianpart(input_dim, variance)
+    part = parts.Brownian.Brownian(input_dim, variance)
     return kern(input_dim, [part])
 
 try:
@@ -215,7 +191,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
     :param n_freq: the number of frequencies considered for the periodic subspace
     :type n_freq: int
     """
-    part = periodic_exponentialpart(input_dim, variance, lengthscale, period, n_freq, lower, upper)
+    part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
     return kern(input_dim, [part])
 
 def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
@@ -233,7 +209,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
      :param n_freq: the number of frequencies considered for the periodic subspace
      :type n_freq: int
     """
-    part = periodic_Matern32part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
+    part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
     return kern(input_dim, [part])
 
 def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
@@ -251,7 +227,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
      :param n_freq: the number of frequencies considered for the periodic subspace
      :type n_freq: int
     """
-    part = periodic_Matern52part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
+    part = parts.periodic_Matern52part(input_dim, variance, lengthscale, period, n_freq, lower, upper)
     return kern(input_dim, [part])
 
 def prod(k1,k2,tensor=False):
@@ -262,7 +238,7 @@ def prod(k1,k2,tensor=False):
     :type k1, k2: kernpart
     :rtype: kernel object
     """
-    part = prodpart(k1,k2,tensor)
+    part = parts.prodpart(k1,k2,tensor)
     return kern(part.input_dim, [part])
 
 def symmetric(k):
@@ -270,11 +246,11 @@ def symmetric(k):
     Construct a symmetrical kernel from an existing kernel
     """
     k_ = k.copy()
-    k_.parts = [symmetric_part(p) for p in k.parts]
+    k_.parts = [symmetric.Symmetric(p) for p in k.parts]
     return k_
 
-def Coregionalise(Nout,R=1, W=None, kappa=None):
-    p = coregionalise_part(Nout,R,W,kappa)
+def coregionalise(Nout,R=1, W=None, kappa=None):
+    p = parts.coregionalise.Coregionalise(Nout,R,W,kappa)
     return kern(1,[p])
 
 
@@ -291,10 +267,10 @@ def rational_quadratic(input_dim, variance=1., lengthscale=1., power=1.):
     :rtype: kern object
 
     """
-    part = rational_quadraticpart(input_dim, variance, lengthscale, power)
+    part = parts.rational_quadratic.RationalQuadratic(input_dim, variance, lengthscale, power)
     return kern(input_dim, [part])
 
-def Fixed(input_dim, K, variance=1.):
+def fixed(input_dim, K, variance=1.):
     """
      Construct a Fixed effect kernel.
 
@@ -304,23 +280,21 @@ def Fixed(input_dim, K, variance=1.):
      K (np.array), obligatory
      variance (float)
     """
-    part = fixedpart(input_dim, K, variance)
+    part = parts.fixed.Fixed(input_dim, K, variance)
     return kern(input_dim, [part])
 
 def rbfcos(input_dim, variance=1., frequencies=None, bandwidths=None, ARD=False):
     """
     construct a rbfcos kernel
     """
-    part = rbfcospart(input_dim, variance, frequencies, bandwidths, ARD)
+    part = parts.rbfcos.RBFCos(input_dim, variance, frequencies, bandwidths, ARD)
     return kern(input_dim, [part])
 
-def IndependentOutputs(k):
+def independent_outputs(k):
     """
     Construct a kernel with independent outputs from an existing kernel
     """
     for sl in k.input_slices:
         assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
-    parts = [independent_output_part(p) for p in k.parts]
+    parts = [independent_outputs.IndependentOutputs(p) for p in k.parts]
     return kern(k.input_dim+1,parts)
-
-
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 76a0d99e..ad3ed3f9 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -1,13 +1,12 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 import numpy as np
 import pylab as pb
 from ..core.parameterised import Parameterised
-from kernpart import Kernpart
+from parts.kernpart import Kernpart
 import itertools
-from prod import prod
+from parts.prod import Prod as prod
 
 class kern(Parameterised):
     def __init__(self, input_dim, parts=[], input_slices=None):
diff --git a/GPy/kern/Brownian.py b/GPy/kern/parts/Brownian.py
similarity index 100%
rename from GPy/kern/Brownian.py
rename to GPy/kern/parts/Brownian.py
diff --git a/GPy/kern/Matern32.py b/GPy/kern/parts/Matern32.py
similarity index 100%
rename from GPy/kern/Matern32.py
rename to GPy/kern/parts/Matern32.py
diff --git a/GPy/kern/Matern52.py b/GPy/kern/parts/Matern52.py
similarity index 100%
rename from GPy/kern/Matern52.py
rename to GPy/kern/parts/Matern52.py
diff --git a/GPy/kern/bias.py b/GPy/kern/parts/bias.py
similarity index 99%
rename from GPy/kern/bias.py
rename to GPy/kern/parts/bias.py
index 8ec3741d..2b72e7c9 100644
--- a/GPy/kern/bias.py
+++ b/GPy/kern/parts/bias.py
@@ -6,7 +6,7 @@ from kernpart import Kernpart
 import numpy as np
 import hashlib
 
-class bias(Kernpart):
+class Bias(Kernpart):
     def __init__(self,input_dim,variance=1.):
         """
         :param input_dim: the number of input dimensions
diff --git a/GPy/kern/coregionalise.py b/GPy/kern/parts/coregionalise.py
similarity index 100%
rename from GPy/kern/coregionalise.py
rename to GPy/kern/parts/coregionalise.py
diff --git a/GPy/kern/exponential.py b/GPy/kern/parts/exponential.py
similarity index 99%
rename from GPy/kern/exponential.py
rename to GPy/kern/parts/exponential.py
index 5cb0f584..d8cf76f7 100644
--- a/GPy/kern/exponential.py
+++ b/GPy/kern/parts/exponential.py
@@ -6,7 +6,7 @@ from kernpart import Kernpart
 import numpy as np
 from scipy import integrate
 
-class exponential(Kernpart):
+class Exponential(Kernpart):
     """
     Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
 
diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/parts/finite_dimensional.py
similarity index 97%
rename from GPy/kern/finite_dimensional.py
rename to GPy/kern/parts/finite_dimensional.py
index b23ddb16..6cc2325f 100644
--- a/GPy/kern/finite_dimensional.py
+++ b/GPy/kern/parts/finite_dimensional.py
@@ -4,9 +4,9 @@
 
 from kernpart import Kernpart
 import numpy as np
-from ..util.linalg import pdinv,mdot
+from ...util.linalg import pdinv,mdot
 
-class finite_dimensional(Kernpart):
+class FiniteDimensional(Kernpart):
     def __init__(self, input_dim, F, G, variance=1., weights=None):
         """
         Argumnents
diff --git a/GPy/kern/fixed.py b/GPy/kern/parts/fixed.py
similarity index 100%
rename from GPy/kern/fixed.py
rename to GPy/kern/parts/fixed.py
diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/parts/independent_outputs.py
similarity index 100%
rename from GPy/kern/independent_outputs.py
rename to GPy/kern/parts/independent_outputs.py
diff --git a/GPy/kern/kernpart.py b/GPy/kern/parts/kernpart.py
similarity index 100%
rename from GPy/kern/kernpart.py
rename to GPy/kern/parts/kernpart.py
diff --git a/GPy/kern/linear.py b/GPy/kern/parts/linear.py
similarity index 99%
rename from GPy/kern/linear.py
rename to GPy/kern/parts/linear.py
index 0ed887d2..c1da3944 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/parts/linear.py
@@ -4,10 +4,10 @@
 
 from kernpart import Kernpart
 import numpy as np
-from ..util.linalg import tdot
+from ...util.linalg import tdot
 from scipy import weave
 
-class linear(Kernpart):
+class Linear(Kernpart):
     """
     Linear kernel
 
diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/parts/periodic_Matern32.py
similarity index 99%
rename from GPy/kern/periodic_Matern32.py
rename to GPy/kern/parts/periodic_Matern32.py
index 664a5183..5693085d 100644
--- a/GPy/kern/periodic_Matern32.py
+++ b/GPy/kern/parts/periodic_Matern32.py
@@ -7,7 +7,7 @@ import numpy as np
 from GPy.util.linalg import mdot
 from GPy.util.decorators import silence_errors
 
-class periodic_Matern32(Kernpart):
+class PeriodicMatern32(Kernpart):
     """
     Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
 
diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/parts/periodic_Matern52.py
similarity index 99%
rename from GPy/kern/periodic_Matern52.py
rename to GPy/kern/parts/periodic_Matern52.py
index c01d2d26..7b5ae846 100644
--- a/GPy/kern/periodic_Matern52.py
+++ b/GPy/kern/parts/periodic_Matern52.py
@@ -7,7 +7,7 @@ import numpy as np
 from GPy.util.linalg import mdot
 from GPy.util.decorators import silence_errors
 
-class periodic_Matern52(Kernpart):
+class PeriodicMatern52(Kernpart):
     """
     Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
 
diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/parts/periodic_exponential.py
similarity index 99%
rename from GPy/kern/periodic_exponential.py
rename to GPy/kern/parts/periodic_exponential.py
index fcaf6420..36b7b9ac 100644
--- a/GPy/kern/periodic_exponential.py
+++ b/GPy/kern/parts/periodic_exponential.py
@@ -7,7 +7,7 @@ import numpy as np
 from GPy.util.linalg import mdot
 from GPy.util.decorators import silence_errors
 
-class periodic_exponential(Kernpart):
+class PeriodicExponential(Kernpart):
     """
     Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
 
diff --git a/GPy/kern/prod.py b/GPy/kern/parts/prod.py
similarity index 99%
rename from GPy/kern/prod.py
rename to GPy/kern/parts/prod.py
index 493129c6..db31c626 100644
--- a/GPy/kern/prod.py
+++ b/GPy/kern/parts/prod.py
@@ -5,7 +5,7 @@ from kernpart import Kernpart
 import numpy as np
 import hashlib
 
-class prod(Kernpart):
+class Prod(Kernpart):
     """
     Computes the product of 2 kernels
 
diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/parts/prod_orthogonal.py
similarity index 100%
rename from GPy/kern/prod_orthogonal.py
rename to GPy/kern/parts/prod_orthogonal.py
diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/parts/rational_quadratic.py
similarity index 98%
rename from GPy/kern/rational_quadratic.py
rename to GPy/kern/parts/rational_quadratic.py
index d1e7a7e3..d92b43db 100644
--- a/GPy/kern/rational_quadratic.py
+++ b/GPy/kern/parts/rational_quadratic.py
@@ -5,7 +5,7 @@
 from kernpart import Kernpart
 import numpy as np
 
-class rational_quadratic(Kernpart):
+class RationalQuadratic(Kernpart):
     """
     rational quadratic kernel
 
diff --git a/GPy/kern/rbf.py b/GPy/kern/parts/rbf.py
similarity index 99%
rename from GPy/kern/rbf.py
rename to GPy/kern/parts/rbf.py
index 5686d7bd..aa07e15d 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/parts/rbf.py
@@ -6,9 +6,9 @@ from kernpart import Kernpart
 import numpy as np
 import hashlib
 from scipy import weave
-from ..util.linalg import tdot
+from ...util.linalg import tdot
 
-class rbf(Kernpart):
+class RBF(Kernpart):
     """
     Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
 
diff --git a/GPy/kern/rbfcos.py b/GPy/kern/parts/rbfcos.py
similarity index 99%
rename from GPy/kern/rbfcos.py
rename to GPy/kern/parts/rbfcos.py
index b1e99d3c..4d09dfdb 100644
--- a/GPy/kern/rbfcos.py
+++ b/GPy/kern/parts/rbfcos.py
@@ -6,7 +6,7 @@
 from kernpart import Kernpart
 import numpy as np
 
-class rbfcos(Kernpart):
+class RBFCos(Kernpart):
     def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
         self.input_dim = input_dim
         self.name = 'rbfcos'
diff --git a/GPy/kern/spline.py b/GPy/kern/parts/spline.py
similarity index 98%
rename from GPy/kern/spline.py
rename to GPy/kern/parts/spline.py
index f2802180..e675420c 100644
--- a/GPy/kern/spline.py
+++ b/GPy/kern/parts/spline.py
@@ -9,7 +9,7 @@ def theta(x):
     """Heaviside step function"""
     return np.where(x>=0.,1.,0.)
 
-class spline(Kernpart):
+class Spline(Kernpart):
     """
     Spline kernel
 
diff --git a/GPy/kern/symmetric.py b/GPy/kern/parts/symmetric.py
similarity index 99%
rename from GPy/kern/symmetric.py
rename to GPy/kern/parts/symmetric.py
index c7099a6f..bbdd5ac0 100644
--- a/GPy/kern/symmetric.py
+++ b/GPy/kern/parts/symmetric.py
@@ -4,7 +4,7 @@
 from kernpart import Kernpart
 import numpy as np
 
-class symmetric(Kernpart):
+class Symmetric(Kernpart):
     """
     Symmetrical kernels
 
diff --git a/GPy/kern/sympy_helpers.cpp b/GPy/kern/parts/sympy_helpers.cpp
similarity index 100%
rename from GPy/kern/sympy_helpers.cpp
rename to GPy/kern/parts/sympy_helpers.cpp
diff --git a/GPy/kern/sympy_helpers.h b/GPy/kern/parts/sympy_helpers.h
similarity index 100%
rename from GPy/kern/sympy_helpers.h
rename to GPy/kern/parts/sympy_helpers.h
diff --git a/GPy/kern/sympykern.py b/GPy/kern/parts/sympykern.py
similarity index 100%
rename from GPy/kern/sympykern.py
rename to GPy/kern/parts/sympykern.py
diff --git a/GPy/kern/white.py b/GPy/kern/parts/white.py
similarity index 98%
rename from GPy/kern/white.py
rename to GPy/kern/parts/white.py
index 41f075c3..8241bf55 100644
--- a/GPy/kern/white.py
+++ b/GPy/kern/parts/white.py
@@ -1,10 +1,10 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-
 from kernpart import Kernpart
 import numpy as np
-class white(Kernpart):
+
+class White(Kernpart):
     """
     White noise kernel.
 
diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py
index 98c75827..8a4f8b16 100644
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@@ -21,7 +21,7 @@ class KernelTests(unittest.TestCase):
         """
         X = np.random.rand(30, 4)
         K = np.dot(X, X.T)
-        kernel = GPy.kern.Fixed(4, K)
+        kernel = GPy.kern.fixed(4, K)
         Y = np.ones((30,1))
         m = GPy.models.GPRegression(X,Y,kernel=kernel)
         self.assertTrue(m.checkgrad())
@@ -36,7 +36,7 @@ class KernelTests(unittest.TestCase):
         Y = np.vstack((Y1,Y2))
 
         k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
-        k2 = GPy.kern.Coregionalise(2,1)
+        k2 = GPy.kern.coregionalise(2,1)
         k = k1.prod(k2,tensor=True)
         m = GPy.models.GPRegression(X,Y,kernel=k)
         self.assertTrue(m.checkgrad())

From 2cab558f38839cdedf3cb25e5b7881942e87e43e Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 16:59:10 +0100
Subject: [PATCH 13/19] removed unnecessary gitignore line

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 6c03ec1b..d494630f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,6 @@
 dist
 build
 eggs
-parts
 bin
 var
 sdist

From 13d8ff2f20a795e6a5d02aa2edf8069bab0fc6a5 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 16:59:23 +0100
Subject: [PATCH 14/19] added init

---
 GPy/kern/parts/__init__.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 GPy/kern/parts/__init__.py

diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
new file mode 100644
index 00000000..b1f3e17b
--- /dev/null
+++ b/GPy/kern/parts/__init__.py
@@ -0,0 +1,24 @@
+import bias
+import Brownian
+import coregionalise
+import exponential
+import finite_dimensional
+import fixed
+import independent_outputs
+import linear
+import Matern32
+import Matern52
+import periodic_exponential
+import periodic_Matern32
+import periodic_Matern52
+import prod_orthogonal
+import prod
+import rational_quadratic
+import rbfcos
+import rbf
+import spline
+import symmetric
+import sympy_helpers.cpp
+import sympy_helpers.h
+import sympykern
+import white

From ab82e4b196402785abb29ee4a37edf06b0b2f2db Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 17:06:46 +0100
Subject: [PATCH 15/19] removed sympy helpers from init

---
 GPy/kern/parts/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index b1f3e17b..1a01882e 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -18,7 +18,5 @@ import rbfcos
 import rbf
 import spline
 import symmetric
-import sympy_helpers.cpp
-import sympy_helpers.h
 import sympykern
 import white

From a8c82eac0252129478e117e75a96a2843aed593d Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Mon, 17 Jun 2013 17:11:19 +0100
Subject: [PATCH 16/19] added missing import in util.linalg

---
 GPy/likelihoods/gaussian.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 8dbd8863..ef70d414 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -1,5 +1,6 @@
 import numpy as np
 from likelihood import likelihood
+from GPy.util.linalg import jitchol
 
 class Gaussian(likelihood):
     """
@@ -7,7 +8,7 @@ class Gaussian(likelihood):
 
     :param Y: observed output (Nx1 numpy.darray)
     ..Note:: Y values allowed depend on the likelihood_function used
-    :param variance : 
+    :param variance :
     :param normalize:  whether to normalize the data before computing (predictions will be in original scales)
     :type normalize: False|True
     """

From d61d379f5daf908ee1fcd3634bc3a1bca4f1db91 Mon Sep 17 00:00:00 2001
From: James Hensman <james.hensman@gmail.com>
Date: Wed, 19 Jun 2013 15:01:33 +0100
Subject: [PATCH 17/19] added anopther simple subplotting function

---
 GPy/util/plot.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/GPy/util/plot.py b/GPy/util/plot.py
index 309c440e..f44864f3 100644
--- a/GPy/util/plot.py
+++ b/GPy/util/plot.py
@@ -70,6 +70,36 @@ def align_subplots(N,M,xlim=None, ylim=None):
         else:
             removeUpperTicks()
 
+def align_subplot_array(axes,xlim=None, ylim=None):
+    """make all of the axes in the array hae the same limits, turn off unnecessary ticks
+    
+    use pb.subplots() to get an array of axes
+    """
+    #find sensible xlim,ylim
+    if xlim is None:
+        xlim = [np.inf,-np.inf]
+        for ax in axes.flatten():
+            xlim[0] = min(xlim[0],ax.get_xlim()[0])
+            xlim[1] = max(xlim[1],ax.get_xlim()[1])
+    if ylim is None:
+        ylim = [np.inf,-np.inf]
+        for ax in axes.flatten():
+            ylim[0] = min(ylim[0],ax.get_ylim()[0])
+            ylim[1] = max(ylim[1],ax.get_ylim()[1])
+
+    N,M = axes.shape
+    for i,ax in enumerate(axes.flatten()):
+        ax.set_xlim(xlim)
+        ax.set_ylim(ylim)
+        if (i)%M:
+            ax.set_yticks([])
+        else:
+            removeRightTicks(ax)
+        if i<(M*(N-1)):
+            ax.set_xticks([])
+        else:
+            removeUpperTicks(ax)
+
 def x_frame1D(X,plot_limits=None,resolution=None):
     """
     Internal helper function for making plots, returns a set of input values to plot as well as lower and upper limits

From 18285bd01ce9d54d658f9ba290851429bad0bf44 Mon Sep 17 00:00:00 2001
From: Nicolo Fusi <nicolo.fusi@gmail.com>
Date: Wed, 19 Jun 2013 16:46:05 +0100
Subject: [PATCH 18/19] removed sympy import

---
 GPy/kern/parts/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py
index 1a01882e..68762afc 100644
--- a/GPy/kern/parts/__init__.py
+++ b/GPy/kern/parts/__init__.py
@@ -18,5 +18,4 @@ import rbfcos
 import rbf
 import spline
 import symmetric
-import sympykern
 import white

From cb7ac1dee1cbaca6a5b83c71840e0acc90b70102 Mon Sep 17 00:00:00 2001
From: Teo de Campos <teo@compbio1.(none)>
Date: Thu, 20 Jun 2013 17:08:03 +0100
Subject: [PATCH 19/19] added an include

---
 GPy/likelihoods/gaussian.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 8dbd8863..fed48563 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -1,5 +1,6 @@
 import numpy as np
 from likelihood import likelihood
+from ..util.linalg import jitchol
 
 class Gaussian(likelihood):
     """