diff --git a/GPy/examples/BGPLVM_demo.py b/GPy/examples/BGPLVM_demo.py
deleted file mode 100644
index e92856ab..00000000
--- a/GPy/examples/BGPLVM_demo.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import numpy as np
-import pylab as pb
-import GPy
-np.random.seed(123344)
-
-N = 10
-M = 3
-Q = 2
-D = 4
-#generate GPLVM-like data
-X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
-K = k.K(X)
-Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-
-k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
-# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
-# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-# k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
-
-m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
-m.constrain_positive('(rbf|bias|noise|white|S)')
-# m.constrain_fixed('S', 1)
-
-# pb.figure()
-# m.plot()
-# pb.title('PCA initialisation')
-# pb.figure()
-# m.optimize(messages = 1)
-# m.plot()
-# pb.title('After optimisation')
-m.ensure_default_constraints()
-m.randomize()
-m.checkgrad(verbose = 1)
diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py
index ce4618ac..551bff54 100644
--- a/GPy/examples/__init__.py
+++ b/GPy/examples/__init__.py
@@ -1,9 +1,8 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
-# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
-# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
 import classification
 import regression
-import unsupervised
+import dimensionality_reduction
+import non_gaussian
 import tutorials
diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 031cc915..77bd0b79 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -107,3 +107,81 @@ def toy_linear_1d_classification(seed=default_seed):
     print(m)
 
     return m
+
+def sparse_toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
+    Y = data['Y'][:, 0:1]
+    Y[Y == -1] = 0
+
+    # Kernel object
+    kernel = GPy.kern.rbf(1)
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
+
+    Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
+
+    # Model definition
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z,normalize_X=True)
+    m.set('len',.5)
+
+    m.ensure_default_constraints()
+    # Optimize
+    m.update_likelihood_approximation()
+    # Parameters optimization:
+    m.optimize()
+    #m.EPEM() #FIXME
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f()
+    pb.subplot(212)
+    m.plot()
+    print(m)
+
+    return m
+
+def sparse_crescent_data(inducing=10, seed=default_seed):
+    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+
+    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+    :param seed : seed value for data generation.
+    :type seed: int
+    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
+    :type inducing: int
+    """
+
+    data = GPy.util.datasets.crescent_data(seed=seed)
+
+    # Kernel object
+    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
+
+    sample = np.random.randint(0,data['X'].shape[0],inducing)
+    Z = data['X'][sample,:]
+    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
+
+    # create sparse GP EP model
+    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
+    m.ensure_default_constraints()
+    m.set('len',10.)
+
+    m.update_likelihood_approximation()
+
+    # optimize
+    m.optimize()
+    print(m)
+
+    # plot
+    m.plot()
+    return m
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
new file mode 100644
index 00000000..513d30d1
--- /dev/null
+++ b/GPy/examples/dimensionality_reduction.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+import GPy
+
+default_seed = np.random.seed(123344)
+
+def BGPLVM(seed = default_seed):
+    N = 10
+    M = 3
+    Q = 2
+    D = 4
+    #generate GPLVM-like data
+    X = np.random.rand(N, Q)
+    k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+    K = k.K(X)
+    Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+
+    k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
+
+    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+    m.constrain_positive('(rbf|bias|noise|white|S)')
+    # m.constrain_fixed('S', 1)
+
+    # pb.figure()
+    # m.plot()
+    # pb.title('PCA initialisation')
+    # pb.figure()
+    # m.optimize(messages = 1)
+    # m.plot()
+    # pb.title('After optimisation')
+    m.ensure_default_constraints()
+    m.randomize()
+    m.checkgrad(verbose = 1)
+
+    return m
+
+def GPLVM_oil_100():
+    data = GPy.util.datasets.oil_100()
+
+    # create simple GP model
+    m = GPy.models.GPLVM(data['X'], 2)
+
+
+    # optimize
+    m.ensure_default_constraints()
+    m.optimize()
+
+    # plot
+    print(m)
+    return m
diff --git a/GPy/examples/poisson.py b/GPy/examples/non_gaussian.py
similarity index 96%
rename from GPy/examples/poisson.py
rename to GPy/examples/non_gaussian.py
index ce68e921..e893ec2c 100644
--- a/GPy/examples/poisson.py
+++ b/GPy/examples/non_gaussian.py
@@ -11,7 +11,7 @@ import GPy
 
 default_seed=10000
 
-def  toy_1d(seed=default_seed):
+def  toy_poisson_1d(seed=default_seed):
     """
     Simple 1D classification example
     :param seed : seed value for data generation (default is 4).
diff --git a/GPy/examples/oil_flow_demo.py b/GPy/examples/oil_flow_demo.py
deleted file mode 100644
index 1e9f4f5a..00000000
--- a/GPy/examples/oil_flow_demo.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import cPickle as pickle
-import numpy as np
-import pylab as pb
-import GPy
-import pylab as plt
-np.random.seed(3)
-
-def plot_oil(X, theta, labels, label):
-    plt.figure()
-    X = X[:,np.argsort(theta)[:2]]
-    flow_type = (X[labels[:,0]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
-    flow_type = (X[labels[:,1]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
-    flow_type = (X[labels[:,2]==1])
-    plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
-    plt.title(label)
-
-data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
-
-Y = data['DataTrn']
-N, D = Y.shape
-selected = np.random.permutation(N)[:350]
-labels = data['DataTrnLbls'][selected]
-Y = Y[selected]
-N, D = Y.shape
-Y -= Y.mean(axis=0)
-# Y /= Y.std(axis=0)
-
-Q = 5
-k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
-m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
-m.constrain_positive('(rbf|bias|S|linear|white|noise)')
-
-# m.unconstrain('noise')
-# m.constrain_fixed('noise_precision', 50.0)
-# m.unconstrain('white')
-# m.constrain_bounded('white', 1e-6, 10.0)
-# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
-#m.optimize(messages = True)
-# m.optimize('tnc', messages = True)
-# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
-# # pb.figure()
-# m.plot()
-# pb.title('PCA initialisation')
-# pb.figure()
-# m.optimize(messages = 1)
-# m.plot()
-# pb.title('After optimisation')
-# m = GPy.models.GPLVM(Y, Q)
-# m.constrain_positive('(white|rbf|bias|noise)')
-# m.optimize()
-# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index d3442504..6c22b68e 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -41,10 +41,6 @@ def rogers_girolami_olympics():
     print(m)
     return m
 
-def della_gatta_TRP63_gene_expression(number=942):
-    """Run a standard Gaussian process regression on the della Gatta et al TRP63 Gene Expression data set for a given gene number."""
-
-
 def toy_rbf_1d_50():
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d_50()
@@ -108,9 +104,6 @@ def coregionalisation_toy2():
     pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
     return m
 
-
-
-
 def coregionalisation_toy():
     """
     A simple demonstration of coregionalisation on two sinusoidal functions
@@ -130,7 +123,7 @@ def coregionalisation_toy():
     m.constrain_fixed('rbf_var',1.)
     m.constrain_positive('kappa')
     m.ensure_default_constraints()
-    #m.optimize()
+    m.optimize()
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -158,7 +151,6 @@ def coregionalisation_sparse():
 
     M = 40
     Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None]))
-    #Z = X.copy()
 
     k1 = GPy.kern.rbf(1)
     k2 = GPy.kern.coregionalise(2,2)
@@ -184,7 +176,6 @@ def coregionalisation_sparse():
     y = pb.ylim()[0]
     pb.plot(Z[:,0][Z[:,1]==0],np.zeros(np.sum(Z[:,1]==0))+y,'r|',mew=2)
     pb.plot(Z[:,0][Z[:,1]==1],np.zeros(np.sum(Z[:,1]==1))+y,'g|',mew=2)
-    print Z
     return m
 
 
@@ -211,7 +202,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
 
     xlim = ax.get_xlim()
     ylim = ax.get_ylim()
-    
+
     # Now run a few optimizations
     models = []
     optim_point_x = np.empty(2)
@@ -219,18 +210,18 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
     np.random.seed(seed=seed)
     for i in range(0, model_restarts):
         kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
-        
+
         m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
         optim_point_x[0] = m.get('rbf_lengthscale')
         optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
-        
+
         # optimize
         m.ensure_default_constraints()
         m.optimize(xtol=1e-6,ftol=1e-6)
 
         optim_point_x[1] = m.get('rbf_lengthscale')
         optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
-        
+
         pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
         models.append(m)
 
@@ -264,7 +255,7 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
             total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
             noise_var *= total_var
             signal_var *= total_var
-            
+
             kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
 
             model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
@@ -273,3 +264,70 @@ def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf)
         lls.append(length_scale_lls)
     return np.array(lls)
 
+def sparse_GP_regression_1D(N = 400, M = 5):
+    """Run a 1D example of a sparse GP regression."""
+    # sample inputs and outputs
+    X = np.random.uniform(-3.,3.,(N,1))
+    Y = np.sin(X)+np.random.randn(N,1)*0.05
+    # construct kernel
+    rbf =  GPy.kern.rbf(1)
+    noise = GPy.kern.white(1)
+    kernel = rbf + noise
+    # create simple GP model
+    m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
+
+    m.constrain_positive('(variance|lengthscale|precision)')
+
+    m.checkgrad(verbose=1)
+    m.optimize('tnc', messages = 1)
+    m.plot()
+    return m
+
+def sparse_GP_regression_2D(N = 400, M = 50):
+    """Run a 2D example of a sparse GP regression."""
+    X = np.random.uniform(-3.,3.,(N,2))
+    Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
+
+    # construct kernel
+    rbf =  GPy.kern.rbf(2)
+    noise = GPy.kern.white(2)
+    kernel = rbf + noise
+
+    # create simple GP model
+    m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
+
+    # contrain all parameters to be positive (but not inducing inputs)
+    m.constrain_positive('(variance|lengthscale|precision)')
+    m.set('len',2.)
+
+    m.checkgrad()
+
+    # optimize and plot
+    pb.figure()
+    m.optimize('tnc', messages = 1)
+    m.plot()
+    print(m)
+    return m
+
+def uncertain_inputs_sparse_regression():
+    """Run a 1D example of a sparse GP regression with uncertain inputs."""
+    # sample inputs and outputs
+    S = np.ones((20,1))
+    X = np.random.uniform(-3.,3.,(20,1))
+    Y = np.sin(X)+np.random.randn(20,1)*0.05
+    likelihood = GPy.likelihoods.Gaussian(Y)
+    Z = np.random.uniform(-3.,3.,(7,1))
+
+    k = GPy.kern.rbf(1) + GPy.kern.white(1)
+
+    # create simple GP model
+    m = GPy.models.sparse_GP(X, likelihood, kernel=k, Z=Z, X_uncertainty=S)
+
+    # contrain all parameters to be positive
+    m.constrain_positive('(variance|prec)')
+
+    # optimize and plot
+    m.optimize('tnc', max_f_eval = 1000, messages=1)
+    m.plot()
+    print(m)
+    return m
diff --git a/GPy/examples/sparse_GPLVM_demo.py b/GPy/examples/sparse_GPLVM_demo.py
deleted file mode 100644
index 5df72b8d..00000000
--- a/GPy/examples/sparse_GPLVM_demo.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-import pylab as pb
-import GPy
-np.random.seed(1)
-print "sparse GPLVM with RBF kernel"
-
-N = 100
-M = 8
-Q = 1
-D = 2
-#generate GPLVM-like data
-X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
-K = k.K(X)
-Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-
-m = GPy.models.sparse_GPLVM(Y, Q, M=M)
-m.constrain_positive('(rbf|bias|noise|white)')
-
-pb.figure()
-m.plot()
-pb.title('PCA initialisation')
-pb.figure()
-m.optimize(messages = 1)
-m.plot()
-pb.title('After optimisation')
diff --git a/GPy/examples/sparse_GP_regression_demo.py b/GPy/examples/sparse_GP_regression_demo.py
deleted file mode 100644
index 808d943f..00000000
--- a/GPy/examples/sparse_GP_regression_demo.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-pb.ion()
-N = 400
-M = 5
-
-######################################
-## 1 dimensional example
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(N,1))
-Y = np.sin(X)+np.random.randn(N,1)*0.05
-
-# construct kernel
-rbf =  GPy.kern.rbf(1)
-noise = GPy.kern.white(1)
-kernel = rbf + noise
-
-# create simple GP model
-m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
-
-m.constrain_positive('(variance|lengthscale|precision)')
-
-m.checkgrad(verbose=1)
-m.optimize('tnc', messages = 1)
-m.plot()
-
-######################################
-## 2 dimensional example
-
-# # sample inputs and outputs
-# X = np.random.uniform(-3.,3.,(N,2))
-# Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
-
-# # construct kernel
-# rbf =  GPy.kern.rbf(2)
-# noise = GPy.kern.white(2)
-# kernel = rbf + noise
-
-# # create simple GP model
-# m2 = GPy.models.sparse_GP_regression(X,Y,kernel, M = 50)
-# create simple GP model
-
-# # contrain all parameters to be positive (but not inducing inputs)
-# m2.constrain_positive('(variance|lengthscale|precision)')
-
-# #check gradient FIXME unit test please
-# m2.checkgrad()
-
-# # optimize and plot
-# pb.figure()
-# m2.optimize('tnc', messages = 1)
-# m2.plot()
-# print(m2)
diff --git a/GPy/examples/sparse_ep_fix.py b/GPy/examples/sparse_ep_fix.py
deleted file mode 100644
index acbd506c..00000000
--- a/GPy/examples/sparse_ep_fix.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-N = 500
-M = 5
-
-default_seed=10000
-
-def crescent_data(inducing=10, seed=default_seed):
-    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
-
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
-    :param seed : seed value for data generation.
-    :type seed: int
-    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-    :type inducing: int
-    """
-
-    data = GPy.util.datasets.crescent_data(seed=seed)
-
-    # Kernel object
-    kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
-
-    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
-
-    sample = np.random.randint(0,data['X'].shape[0],inducing)
-    Z = data['X'][sample,:]
-    #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
-
-    # create sparse GP EP model
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
-    m.ensure_default_constraints()
-
-    m.update_likelihood_approximation()
-    print(m)
-
-    # optimize
-    m.optimize()
-    print(m)
-
-    # plot
-    m.plot()
-    return m
-
-
-def toy_linear_1d_classification(seed=default_seed):
-    """
-    Simple 1D classification example
-    :param seed : seed value for data generation (default is 4).
-    :type seed: int
-    """
-
-    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    Y = data['Y'][:, 0:1]
-    Y[Y == -1] = 0
-
-    # Kernel object
-    kernel = GPy.kern.rbf(1)
-
-    # Likelihood object
-    distribution = GPy.likelihoods.likelihood_functions.probit()
-    likelihood = GPy.likelihoods.EP(Y,distribution)
-
-    Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
-
-    # Model definition
-    m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
-
-    m.ensure_default_constraints()
-    # Optimize
-    m.update_likelihood_approximation()
-    # Parameters optimization:
-    m.optimize()
-    #m.EPEM() #FIXME
-
-    # Plot
-    pb.subplot(211)
-    m.plot_f()
-    pb.subplot(212)
-    m.plot()
-    print(m)
-
-    return m
-
diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py
index be550e01..9d892b8e 100644
--- a/GPy/examples/tutorials.py
+++ b/GPy/examples/tutorials.py
@@ -90,7 +90,7 @@ def tuto_kernel_overview():
 
     # Sum of kernels
     k_add = k1.add(k2)
-    k_addorth = k1.add_orthogonal(k2)    
+    k_addorth = k1.add_orthogonal(k2)
 
     pb.figure(figsize=(8,8))
     pb.subplot(2,2,1)
@@ -199,3 +199,10 @@ def tuto_kernel_overview():
     WN[100] = 1.
     pb.subplot(3,4,i+1)
     pb.plot(X,WN,'b')
+
+def model_interaction():
+    X = np.random.randn(20,1)
+    Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
+    k = GPy.kern.rbf(1) + GPy.kern.bias(1)
+    return GPy.models.GP_regression(X,Y,kernel=k)
+
diff --git a/GPy/examples/uncertain_input_GP_regression_demo.py b/GPy/examples/uncertain_input_GP_regression_demo.py
deleted file mode 100644
index f0be5fe2..00000000
--- a/GPy/examples/uncertain_input_GP_regression_demo.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-import pylab as pb
-import numpy as np
-import GPy
-pb.ion()
-pb.close('all')
-
-
-# sample inputs and outputs
-S = np.ones((20,1))
-X = np.random.uniform(-3.,3.,(20,1))
-Y = np.sin(X)+np.random.randn(20,1)*0.05
-
-k = GPy.kern.rbf(1) + GPy.kern.white(1)
-
-# create simple GP model
-m = GPy.models.sparse_GP_regression(X,Y,X_uncertainty=S,kernel=k)
-
-# contrain all parameters to be positive
-m.constrain_positive('(variance|prec)')
-
-# optimize and plot
-m.optimize('tnc', max_f_eval = 1000, messages=1)
-m.plot()
-print(m)
diff --git a/GPy/examples/uncollapsed_GP_demo.py b/GPy/examples/uncollapsed_GP_demo.py
deleted file mode 100644
index 5dc1ae1d..00000000
--- a/GPy/examples/uncollapsed_GP_demo.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
-# Licensed under the BSD 3-clause license (see LICENSE.txt)
-
-
-import numpy as np
-"""
-Sparse Gaussian Processes regression with an RBF kernel, 
-using the uncollapsed sparse GP (where the distribution of the 
-inducing points is explicitley represented)
-"""
-import pylab as pb
-import numpy as np
-import GPy
-np.random.seed(2)
-pb.ion()
-N = 500
-M = 20
-
-# sample inputs and outputs
-X = np.random.uniform(-3.,3.,(N,1))
-Y = np.sin(X)+np.random.randn(N,1)*0.05
-
-kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
-
-# create simple GP model
-m = GPy.models.uncollapsed_sparse_GP(X, Y, kernel=kernel, M=M)#, X_uncertainty=np.zeros_like(X)+0.01)
-
-# contrain all parameters to be positive
-m.ensure_default_constraints()
-m.checkgrad()
-# optimize and plot
-m.plot()
diff --git a/GPy/examples/unsupervised.py b/GPy/examples/unsupervised.py
deleted file mode 100644
index 08d81e05..00000000
--- a/GPy/examples/unsupervised.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-Usupervised learning with Gaussian Processes.
-"""
-import pylab as pb
-import numpy as np
-import GPy
-
-
-######################################
-## Oil data subsampled to 100 points.
-def oil_100():
-    data = GPy.util.datasets.oil_100()
-
-    # create simple GP model
-    m = GPy.models.GPLVM(data['X'], 2)
-
-
-    # optimize
-    m.ensure_default_constraints()
-    m.optimize()
-
-    # plot
-    print(m)
-    return m
-
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 87e67f33..b2970674 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -389,6 +389,11 @@ class kern(parameterised):
                 target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
             elif p2.name=='bias' and p1.name=='rbf':
                 target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
+            #linear X bias
+            elif p1.name=='bias' and p2.name=='linear':
+                raise NotImplementedError
+            elif p2.name=='bias' and p1.name=='linear':
+                raise NotImplementedError
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -396,7 +401,6 @@ class kern(parameterised):
                 raise NotImplementedError #TODO
             else:
                 raise NotImplementedError, "psi2 cannot be computed for this kernel"
-
         return target
 
     def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
@@ -417,11 +421,11 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target[ps2])
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
             elif p2.name=='bias' and p1.name=='rbf':
-                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance,Z,mu,S,target[ps1])
-                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1,Z,mu,S,target[ps2])
+                p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
+                p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -444,9 +448,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target)
+                p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S,target)
+                p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
@@ -471,9 +475,9 @@ class kern(parameterised):
                 pass
             #rbf X bias
             elif p1.name=='bias' and p2.name=='rbf':
-                target += p2.dpsi1_dmuS(partial.sum(1)*p1.variance,Z,mu,S,target_mu,target_S)
+                p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
             elif p2.name=='bias' and p1.name=='rbf':
-                target += p1.dpsi1_dmuS(partial.sum(2)*p2.variance,Z,mu,S,target_mu,target_S)
+                p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
             #rbf X linear
             elif p1.name=='linear' and p2.name=='rbf':
                 raise NotImplementedError #TODO
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index 7d817f62..ef6b72bb 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -81,6 +81,13 @@ class linear(kernpart):
             self._K_computations(X, X2)
             target += np.sum(self._dot_product*dL_dK)
 
+    def dKdiag_dtheta(self,dL_dKdiag, X, target):
+        tmp = dL_dKdiag[:,None]*X**2
+        if self.ARD:
+            target += tmp.sum(0)
+        else:
+            target += tmp.sum()
+
     def dK_dX(self,dL_dK,X,X2,target):
         target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
 
@@ -92,13 +99,6 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         target += np.sum(self.variances*self.mu2_S,1)
 
-    def dKdiag_dtheta(self,dL_dKdiag, X, target):
-        tmp = dL_dKdiag[:,None]*X**2
-        if self.ARD:
-            target += tmp.sum(0)
-        else:
-            target += tmp.sum()
-
     def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
         tmp = dL_dpsi0[:, None] * self.mu2_S
@@ -134,6 +134,7 @@ class linear(kernpart):
         self._psi_computations(Z,mu,S)
         psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
         target += psi2.sum(-1)
+        #TODO: this could be faster using np.tensordot
 
     def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py
index f1439f76..f70938c9 100644
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@@ -103,8 +103,12 @@ class sparse_GP(GP):
 
         self.psi1V = np.dot(self.psi1, self.V)
         self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
-        self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
-        self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
+        tmp = np.dot(self.Lmi.T, self.LBi.T)
+        self.C = np.dot(tmp,tmp.T)
+        #self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
+        #self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
+        tmp = np.dot(self.C,self.psi1V/sf)
+        self.E = np.dot(tmp,tmp.T)
 
         # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
         self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index e3bd2b36..dda92b90 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -45,6 +45,33 @@ class BGPLVMTests(unittest.TestCase):
         m.randomize()
         self.assertTrue(m.checkgrad())
 
+    def test_rbf_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.rbf(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        m.constrain_positive('(rbf|bias|noise|white|S)')
+        m.randomize()
+        self.assertTrue(m.checkgrad())
+
+    @unittest.skip('psi2 cross terms are NotImplemented for this combination')
+    def test_linear_bias_kern(self):
+        N, M, Q, D = 10, 3, 2, 4
+        X = np.random.rand(N, Q)
+        k = GPy.kern.linear(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        K = k.K(X)
+        Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+        Y -= Y.mean(axis=0)
+        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        m.constrain_positive('(linear|bias|noise|white|S)')
+        m.randomize()
+        self.assertTrue(m.checkgrad())        
+
 
 if __name__ == "__main__":
     print "Running unit tests, please be (very) patient..."
diff --git a/doc/GPy.examples.rst b/doc/GPy.examples.rst
index ec283d21..d369de41 100644
--- a/doc/GPy.examples.rst
+++ b/doc/GPy.examples.rst
@@ -73,18 +73,10 @@ examples Package
     :undoc-members:
     :show-inheritance:
 
-:mod:`tuto_GP_regression` Module
---------------------------------
+:mod:`tutorials` Module
+-----------------------
 
-.. automodule:: GPy.examples.tuto_GP_regression
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-:mod:`tuto_kernel_overview` Module
-----------------------------------
-
-.. automodule:: GPy.examples.tuto_kernel_overview
+.. automodule:: GPy.examples.tutorials
     :members:
     :undoc-members:
     :show-inheritance:
diff --git a/doc/GPy.rst b/doc/GPy.rst
index 3fd4bcfd..242a22bc 100644
--- a/doc/GPy.rst
+++ b/doc/GPy.rst
@@ -9,6 +9,14 @@ GPy Package
     :undoc-members:
     :show-inheritance:
 
+:mod:`test_coreg` Module
+------------------------
+
+.. automodule:: GPy.test_coreg
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 Subpackages
 -----------
 
@@ -20,5 +28,6 @@ Subpackages
     GPy.kern
     GPy.likelihoods
     GPy.models
+    GPy.testing
     GPy.util
 
diff --git a/doc/index.rst b/doc/index.rst
index b62ff6a7..5066278f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -8,9 +8,10 @@ Welcome to GPy's documentation!
 For a quick start, you can have a look at one of the tutorials:
 
 * `Basic Gaussian process regression <tuto_GP_regression.html>`_  
+* `Interacting with models <tuto_interacting_with_models.html>`_
 * `A kernel overview <tuto_kernel_overview.html>`_ 
 * Advanced GP regression (Forthcoming)
-* Writting kernels (Forthcoming)
+* Writing kernels (Forthcoming)
 
 You may also be interested by some examples in the GPy/examples folder.
 
diff --git a/setup.py b/setup.py
index b701b74d..b14c907e 100644
--- a/setup.py
+++ b/setup.py
@@ -25,14 +25,12 @@ setup(name = 'GPy',
       long_description=read('README.md'),
       #ext_modules =  [Extension(name = 'GPy.kern.lfmUpsilonf2py',
       #          sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
-      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
+      install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
       extras_require = {
         'docs':['Sphinx', 'ipython'],
       },
       #setup_requires=['sphinx'],
       #cmdclass = {'build_sphinx': BuildDoc},
       classifiers=[
-      "Development Status :: 1 - Alpha",
-      "Topic :: Machine Learning",
       "License :: OSI Approved :: BSD License"],
       )