diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 93605427..e6296dab 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -9,21 +9,224 @@ import pylab as pb
 import numpy as np
 import GPy
 
+def coregionalisation_toy2(max_iters=100):
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions.
+    """
+    X1 = np.random.rand(50, 1) * 8
+    X2 = np.random.rand(30, 1) * 5
+    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1, X2)), index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
+    Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
+    Y = np.vstack((Y1, Y2))
 
-def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
-    """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
-    data = GPy.util.datasets.toy_rbf_1d()
+    k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
+    k2 = GPy.kern.coregionalise(2, 1)
+    k = k1**k2
+    m = GPy.models.GPRegression(X, Y, kernel=k)
+    m.constrain_fixed('.*rbf_var', 1.)
+    # m.constrain_positive('.*kappa')
+    m.optimize('sim', messages=1, max_iters=max_iters)
 
-    # create simple GP Model
-    m = GPy.models.GPRegression(data['X'], data['Y'])
-
-    # optimize
-    m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
-    # plot
-    m.plot()
-    print(m)
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
+    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
+    mean, var, low, up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
+    mean, var, low, up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
+    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
+    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
     return m
 
+def coregionalisation_toy(max_iters=100):
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions.
+    """
+    X1 = np.random.rand(50, 1) * 8
+    X2 = np.random.rand(30, 1) * 5
+    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1, X2)), index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
+    Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
+    Y = np.vstack((Y1, Y2))
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.coregionalise(2, 2)
+    k = k1**k2 #k1.prod(k2, tensor=True)
+    m = GPy.models.GPRegression(X, Y, kernel=k)
+    m.constrain_fixed('.*rbf_var', 1.)
+    # m.constrain_positive('kappa')
+    m.optimize(max_iters=max_iters)
+
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
+    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
+    mean, var, low, up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
+    mean, var, low, up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
+    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
+    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
+    return m
+
+
+def coregionalisation_sparse(max_iters=100):
+    """
+    A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
+    """
+    X1 = np.random.rand(500, 1) * 8
+    X2 = np.random.rand(300, 1) * 5
+    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
+    X = np.hstack((np.vstack((X1, X2)), index))
+    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
+    Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
+    Y = np.vstack((Y1, Y2))
+
+    num_inducing = 40
+    Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.coregionalise(2, 2)
+    k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
+
+    m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
+    m.constrain_fixed('.*rbf_var', 1.)
+    m.constrain_fixed('iip')
+    m.constrain_bounded('noise_variance', 1e-3, 1e-1)
+#     m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
+    m.optimize(max_iters=max_iters)
+
+    # plotting:
+    pb.figure()
+    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
+    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
+    mean, var, low, up = m.predict(Xtest1)
+    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
+    mean, var, low, up = m.predict(Xtest2)
+    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
+    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
+    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
+    y = pb.ylim()[0]
+    pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
+    pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
+    return m
+
+def epomeo_gpx(max_iters=100):
+    """Perform Gaussian process regression on the GPX data from the Mount Epomeo runs. Requires gpxpy to be installed on your system."""
+    data = GPy.util.datasets.epomeo_gpx()
+    num_data_list = []
+    for Xpart in data['X']:
+        num_data_list.append(Xpart.shape[0])
+
+    num_data_array = np.array(num_data_list)
+    num_data = num_data_array.sum()
+    Y = np.zeros((num_data, 3))
+    t = np.zeros((num_data, 2))
+    start = 0
+    for Xpart, index in zip(data['X'], range(len(data['X']))):
+        end = start+Xpart.shape[0]
+        t[start:end, :] = np.hstack((Xpart[:, 0:1],
+                                    index*np.ones((Xpart.shape[0], 1))))
+        Y[start:end, :] = Xpart[:, 1:4]
+
+    num_inducing = 40
+    Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
+                   np.random.randint(0, 4, num_inducing)[:, None]))
+
+    k1 = GPy.kern.rbf(1)
+    k2 = GPy.kern.coregionalise(output_dim=5, rank=5)
+    k = k1**k2 
+
+    m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z)
+    m.constrain_fixed('.*rbf_var', 1.)
+    m.constrain_fixed('iip')
+    m.constrain_bounded('noise_variance', 1e-3, 1e-1)
+#     m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
+#    m.optimize(max_iters=max_iters)
+
+    return m
+
+
+def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300):
+    """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisy mode is higher."""
+
+    # Contour over a range of length scales and signal/noise ratios.
+    length_scales = np.linspace(0.1, 60., resolution)
+    log_SNRs = np.linspace(-3., 4., resolution)
+
+    data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
+    # data['Y'] = data['Y'][0::2, :]
+    # data['X'] = data['X'][0::2, :]
+
+    data['Y'] = data['Y'] - np.mean(data['Y'])
+
+    lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
+    pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
+    ax = pb.gca()
+    pb.xlabel('length scale')
+    pb.ylabel('log_10 SNR')
+
+    xlim = ax.get_xlim()
+    ylim = ax.get_ylim()
+
+    # Now run a few optimizations
+    models = []
+    optim_point_x = np.empty(2)
+    optim_point_y = np.empty(2)
+    np.random.seed(seed=seed)
+    for i in range(0, model_restarts):
+        # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
+        kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
+
+        m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
+        m['noise_variance'] = np.random.uniform(1e-3, 1)
+        optim_point_x[0] = m['rbf_lengthscale']
+        optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
+
+        # optimize
+        m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
+
+        optim_point_x[1] = m['rbf_lengthscale']
+        optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
+
+        pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
+        models.append(m)
+
+    ax.set_xlim(xlim)
+    ax.set_ylim(ylim)
+    return m # (models, lls)
+
+def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
+    """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
+
+    :data_set: A data set from the utils.datasets director.
+    :length_scales: a list of length scales to explore for the contour plot.
+    :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
+    :kernel: a kernel to use for the 'signal' portion of the data."""
+
+    lls = []
+    total_var = np.var(data['Y'])
+    kernel = kernel_call(1, variance=1., lengthscale=1.)
+    model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
+    for log_SNR in log_SNRs:
+        SNR = 10.**log_SNR
+        noise_var = total_var / (1. + SNR)
+        signal_var = total_var - noise_var
+        model.kern['.*variance'] = signal_var
+        model['noise_variance'] = noise_var
+        length_scale_lls = []
+
+        for length_scale in length_scales:
+            model['.*lengthscale'] = length_scale
+            length_scale_lls.append(model.log_likelihood())
+
+        lls.append(length_scale_lls)
+
+    return np.array(lls)
+
+
 def olympic_100m_men(max_iters=100, kernel=None):
     """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
     data = GPy.util.datasets.olympic_100m_men()
@@ -62,6 +265,20 @@ def olympic_marathon_men(max_iters=100, kernel=None):
     print(m)
     return m
 
+def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
+    """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
+    data = GPy.util.datasets.toy_rbf_1d()
+
+    # create simple GP Model
+    m = GPy.models.GPRegression(data['X'], data['Y'])
+
+    # optimize
+    m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
+    # plot
+    m.plot()
+    print(m)
+    return m
+
 def toy_rbf_1d_50(max_iters=100):
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d_50()
@@ -150,203 +367,8 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
     print(m)
     return m
 
-def silhouette(max_iters=100):
-    """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
-    data = GPy.util.datasets.silhouette()
-
-    # create simple GP Model
-    m = GPy.models.GPRegression(data['X'], data['Y'])
-
-    # optimize
-    m.optimize(messages=True, max_iters=max_iters)
-
-    print(m)
-    return m
-
-def coregionalisation_toy2(max_iters=100):
-    """
-    A simple demonstration of coregionalisation on two sinusoidal functions.
-    """
-    X1 = np.random.rand(50, 1) * 8
-    X2 = np.random.rand(30, 1) * 5
-    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
-    X = np.hstack((np.vstack((X1, X2)), index))
-    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
-    Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
-    Y = np.vstack((Y1, Y2))
-
-    k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
-    k2 = GPy.kern.coregionalise(2, 1)
-    k = k1.prod(k2, tensor=True)
-    m = GPy.models.GPRegression(X, Y, kernel=k)
-    m.constrain_fixed('.*rbf_var', 1.)
-    # m.constrain_positive('.*kappa')
-    m.optimize('sim', messages=1, max_iters=max_iters)
-
-    pb.figure()
-    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
-    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
-    mean, var, low, up = m.predict(Xtest1)
-    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
-    mean, var, low, up = m.predict(Xtest2)
-    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
-    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
-    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
-    return m
-
-def coregionalisation_toy(max_iters=100):
-    """
-    A simple demonstration of coregionalisation on two sinusoidal functions.
-    """
-    X1 = np.random.rand(50, 1) * 8
-    X2 = np.random.rand(30, 1) * 5
-    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
-    X = np.hstack((np.vstack((X1, X2)), index))
-    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
-    Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
-    Y = np.vstack((Y1, Y2))
-
-    k1 = GPy.kern.rbf(1)
-    k2 = GPy.kern.coregionalise(2, 2)
-    k = k1.prod(k2, tensor=True)
-    m = GPy.models.GPRegression(X, Y, kernel=k)
-    m.constrain_fixed('.*rbf_var', 1.)
-    # m.constrain_positive('kappa')
-    m.optimize(max_iters=max_iters)
-
-    pb.figure()
-    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
-    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
-    mean, var, low, up = m.predict(Xtest1)
-    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
-    mean, var, low, up = m.predict(Xtest2)
-    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
-    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
-    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
-    return m
-
-
-def coregionalisation_sparse(max_iters=100):
-    """
-    A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
-    """
-    X1 = np.random.rand(500, 1) * 8
-    X2 = np.random.rand(300, 1) * 5
-    index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
-    X = np.hstack((np.vstack((X1, X2)), index))
-    Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
-    Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
-    Y = np.vstack((Y1, Y2))
-
-    num_inducing = 40
-    Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
-
-    k1 = GPy.kern.rbf(1)
-    k2 = GPy.kern.coregionalise(2, 2)
-    k = k1.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
-
-    m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
-    m.constrain_fixed('.*rbf_var', 1.)
-    m.constrain_fixed('iip')
-    m.constrain_bounded('noise_variance', 1e-3, 1e-1)
-#     m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
-    m.optimize('bfgs', messages=1, max_iters=max_iters)
-
-    # plotting:
-    pb.figure()
-    Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1))))
-    Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1))))
-    mean, var, low, up = m.predict(Xtest1)
-    GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
-    mean, var, low, up = m.predict(Xtest2)
-    GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
-    pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
-    pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
-    y = pb.ylim()[0]
-    pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
-    pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
-    return m
-
-
-def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300):
-    """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
-
-    # Contour over a range of length scales and signal/noise ratios.
-    length_scales = np.linspace(0.1, 60., resolution)
-    log_SNRs = np.linspace(-3., 4., resolution)
-
-    data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
-    # data['Y'] = data['Y'][0::2, :]
-    # data['X'] = data['X'][0::2, :]
-
-    data['Y'] = data['Y'] - np.mean(data['Y'])
-
-    lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
-    pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
-    ax = pb.gca()
-    pb.xlabel('length scale')
-    pb.ylabel('log_10 SNR')
-
-    xlim = ax.get_xlim()
-    ylim = ax.get_ylim()
-
-    # Now run a few optimizations
-    models = []
-    optim_point_x = np.empty(2)
-    optim_point_y = np.empty(2)
-    np.random.seed(seed=seed)
-    for i in range(0, model_restarts):
-        # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
-        kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
-
-        m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
-        m['noise_variance'] = np.random.uniform(1e-3, 1)
-        optim_point_x[0] = m['rbf_lengthscale']
-        optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
-
-        # optimize
-        m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
-
-        optim_point_x[1] = m['rbf_lengthscale']
-        optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
-
-        pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
-        models.append(m)
-
-    ax.set_xlim(xlim)
-    ax.set_ylim(ylim)
-    return m # (models, lls)
-
-def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
-    """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
-
-    :data_set: A data set from the utils.datasets director.
-    :length_scales: a list of length scales to explore for the contour plot.
-    :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
-    :kernel: a kernel to use for the 'signal' portion of the data."""
-
-    lls = []
-    total_var = np.var(data['Y'])
-    kernel = kernel_call(1, variance=1., lengthscale=1.)
-    model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
-    for log_SNR in log_SNRs:
-        SNR = 10.**log_SNR
-        noise_var = total_var / (1. + SNR)
-        signal_var = total_var - noise_var
-        model.kern['.*variance'] = signal_var
-        model['noise_variance'] = noise_var
-        length_scale_lls = []
-
-        for length_scale in length_scales:
-            model['.*lengthscale'] = length_scale
-            length_scale_lls.append(model.log_likelihood())
-
-        lls.append(length_scale_lls)
-
-    return np.array(lls)
-
 def robot_wireless(max_iters=100, kernel=None):
-    """Predict the location of a robot given wirelss signal strengthr readings."""
+    """Predict the location of a robot given wirelss signal strength readings."""
     data = GPy.util.datasets.robot_wireless()
 
     # create simple GP Model
@@ -366,6 +388,21 @@ def robot_wireless(max_iters=100, kernel=None):
     print('Sum of squares error on test data: ' + str(sse))
     return m
 
+def silhouette(max_iters=100):
+    """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
+    data = GPy.util.datasets.silhouette()
+
+    # create simple GP Model
+    m = GPy.models.GPRegression(data['X'], data['Y'])
+
+    # optimize
+    m.optimize(messages=True, max_iters=max_iters)
+
+    print(m)
+    return m
+
+
+
 def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
     """Run a 1D example of a sparse GP regression."""
     # sample inputs and outputs
diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py
index 6fd3aa51..03cf43c1 100644
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@@ -306,8 +306,29 @@ def symmetric(k):
     k_.parts = [symmetric.Symmetric(p) for p in k.parts]
     return k_
 
-def coregionalise(Nout, R=1, W=None, kappa=None):
-    p = parts.coregionalise.Coregionalise(Nout,R,W,kappa)
+def coregionalise(output_dim, rank=1, W=None, kappa=None):
+    """
+        Coregionalisation kernel. 
+
+    Used for computing covariance functions of the form
+    .. math::
+       k_2(x, y)=B k(x, y)
+    where
+    .. math::
+       B = WW^\top + kappa I..
+
+    :param output_dim: the number of output dimensions
+    :type output_dim: int
+    :param rank: the rank of the coregionalisation matrix.
+    :type rank: int
+    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
+    :type W: ndarray
+    :param kappa: a diagonal term which allows the outputs to behave independently.
+    :rtype: kernel object
+
+    .. Note: see coregionalisation examples in GPy.examples.regression for some usage.
+    """
+    p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa)
     return kern(1,[p])
 
 
diff --git a/GPy/kern/parts/coregionalise.py b/GPy/kern/parts/coregionalise.py
index 8faceafe..94179088 100644
--- a/GPy/kern/parts/coregionalise.py
+++ b/GPy/kern/parts/coregionalise.py
@@ -9,24 +9,42 @@ from scipy import weave
 
 class Coregionalise(Kernpart):
     """
-    Kernel for Intrinsic Corregionalization Models
+    Coregionalisation kernel. 
+
+    Used for computing covariance functions of the form
+    .. math::
+       k_2(x, y)=B k(x, y)
+    where
+    .. math::
+       B = WW^\top + diag(kappa)
+
+    :param output_dim: the number of output dimensions
+    :type output_dim: int
+    :param rank: the rank of the coregionalisation matrix.
+    :type rank: int
+    :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
+    :type W: ndarray
+    :param kappa: a diagonal term which allows the outputs to behave independently.
+    :rtype: kernel object
+
+    .. Note: see coregionalisation examples in GPy.examples.regression for some usage.
     """
-    def __init__(self,Nout,R=1, W=None, kappa=None):
+    def __init__(self,output_dim,rank=1, W=None, kappa=None):
         self.input_dim = 1
         self.name = 'coregion'
-        self.Nout = Nout
-        self.R = R
+        self.output_dim = output_dim
+        self.rank = rank
         if W is None:
-            self.W = np.ones((self.Nout,self.R))
+            self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
         else:
-            assert W.shape==(self.Nout,self.R)
+            assert W.shape==(self.output_dim,self.rank)
             self.W = W
         if kappa is None:
-            kappa = np.ones(self.Nout)
+            kappa = 0.5*np.ones(self.output_dim)
         else:
-            assert kappa.shape==(self.Nout,)
+            assert kappa.shape==(self.output_dim,)
         self.kappa = kappa
-        self.num_params = self.Nout*(self.R + 1)
+        self.num_params = self.output_dim*(self.rank + 1)
         self._set_params(np.hstack([self.W.flatten(),self.kappa]))
 
     def _get_params(self):
@@ -34,12 +52,12 @@ class Coregionalise(Kernpart):
 
     def _set_params(self,x):
         assert x.size == self.num_params
-        self.kappa = x[-self.Nout:]
-        self.W = x[:-self.Nout].reshape(self.Nout,self.R)
+        self.kappa = x[-self.output_dim:]
+        self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
         self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
 
     def _get_param_names(self):
-        return sum([['W%i_%i'%(i,j) for j in range(self.R)] for i in range(self.Nout)],[]) + ['kappa_%i'%i for i in range(self.Nout)]
+        return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
 
     def K(self,index,index2,target):
         index = np.asarray(index,dtype=np.int)
@@ -57,26 +75,26 @@ class Coregionalise(Kernpart):
         if index2 is None:
             code="""
             for(int i=0;i<N; i++){
-              target[i+i*N] += B[index[i]+Nout*index[i]];
+              target[i+i*N] += B[index[i]+output_dim*index[i]];
               for(int j=0; j<i; j++){
-                  target[j+i*N] += B[index[i]+Nout*index[j]];
+                  target[j+i*N] += B[index[i]+output_dim*index[j]];
                   target[i+j*N] += target[j+i*N];
                 }
               }
             """
-            N,B,Nout = index.size, self.B, self.Nout
-            weave.inline(code,['target','index','N','B','Nout'])
+            N,B,output_dim = index.size, self.B, self.output_dim
+            weave.inline(code,['target','index','N','B','output_dim'])
         else:
             index2 = np.asarray(index2,dtype=np.int)
             code="""
             for(int i=0;i<num_inducing; i++){
               for(int j=0; j<N; j++){
-                  target[i+j*num_inducing] += B[Nout*index[j]+index2[i]];
+                  target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
                 }
               }
             """
-            N,num_inducing,B,Nout = index.size,index2.size, self.B, self.Nout
-            weave.inline(code,['target','index','index2','N','num_inducing','B','Nout'])
+            N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
+            weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
 
 
     def Kdiag(self,index,target):
@@ -93,12 +111,12 @@ class Coregionalise(Kernpart):
         code="""
         for(int i=0; i<num_inducing; i++){
           for(int j=0; j<N; j++){
-            dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*num_inducing];
+            dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
           }
         }
         """
-        N, num_inducing, Nout = index.size, index2.size, self.Nout
-        weave.inline(code, ['N','num_inducing','Nout','dL_dK','dL_dK_small','index','index2'])
+        N, num_inducing, output_dim = index.size, index2.size, self.output_dim
+        weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
 
         dkappa = np.diag(dL_dK_small)
         dL_dK_small += dL_dK_small.T
@@ -115,8 +133,8 @@ class Coregionalise(Kernpart):
         ii,jj = ii.T, jj.T
 
         dL_dK_small = np.zeros_like(self.B)
-        for i in range(self.Nout):
-            for j in range(self.Nout):
+        for i in range(self.output_dim):
+            for j in range(self.output_dim):
                 tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
                 dL_dK_small[i,j] = tmp
 
@@ -128,8 +146,8 @@ class Coregionalise(Kernpart):
 
     def dKdiag_dtheta(self,dL_dKdiag,index,target):
         index = np.asarray(index,dtype=np.int).flatten()
-        dL_dKdiag_small = np.zeros(self.Nout)
-        for i in range(self.Nout):
+        dL_dKdiag_small = np.zeros(self.output_dim)
+        for i in range(self.output_dim):
             dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
         dW = 2.*self.W*dL_dKdiag_small[:,None]
         dkappa = dL_dKdiag_small
diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py
index 8cc82109..d251a5aa 100644
--- a/GPy/kern/parts/prod.py
+++ b/GPy/kern/parts/prod.py
@@ -51,6 +51,16 @@ class Prod(Kernpart):
         self._K_computations(X,X2)
         target += self._K1 * self._K2
 
+    def K1(self,X, X2):
+        """Compute the part of the kernel associated with k1."""
+        self._K_computations(X, X2)
+        return self._K1
+
+    def K2(self, X, X2):
+        """Compute the part of the kernel associated with k2."""
+        self._K_computations(X, X2)
+        return self._K2
+    
     def dK_dtheta(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to the parameters."""
         self._K_computations(X,X2)
diff --git a/GPy/notes.txt b/GPy/notes.txt
index c9c29ace..36ca478f 100644
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@@ -29,3 +29,11 @@ Shall we overload the ** operator to have tensor products? (I've done this now w
 People aren't filling the doc strings in as they go *everyone* needs to get in the habit of this (and modifying them as they edit, or correcting them when there is a problem).
 
 Need some nice way of explaining how to compile documentation and run the unit tests, could this be in a readme or FAQ somewhere? Maybe it's there already somewhere and I've missed it.
+
+Shouldn't EP be in the inference package (not likelihoods)?
+
+When using bfgs in ipython notebook, text appears in the original console, not in the notebook.
+
+In sparse GPs wouldn't it be clearer to call Z inducing?
+
+In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py
index afc08b15..be4d0538 100644
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@@ -283,6 +283,7 @@ try:
     import gpxpy
     import gpxpy.gpx
     gpxpy_available = True
+
 except ImportError:
     gpxpy_available = False