diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py
index 05b6af74..f9aaddd1 100644
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@@ -6,12 +6,11 @@
 Gaussian Processes classification
 """
 import pylab as pb
-import numpy as np
 import GPy
 
 default_seed = 10000
 
-def oil(num_inducing=50, max_iters=100, kernel=None):
+def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
     """
     Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
@@ -25,7 +24,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
     Ytest[Ytest.flatten()==-1] = 0
 
     # Create GP model
-    m = GPy.models.SparseGPClassification(X, Y,kernel=kernel,num_inducing=num_inducing)
+    m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
 
     # Contrain all parameters to be positive
     m.tie_params('.*len')
@@ -33,15 +32,16 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
     m.update_likelihood_approximation()
 
     # Optimize
-    m.optimize(max_iters=max_iters)
+    if optimize:
+        m.optimize(max_iters=max_iters)
     print(m)
 
     #Test
     probs = m.predict(Xtest)[0]
-    GPy.util.classification.conf_matrix(probs,Ytest)
+    GPy.util.classification.conf_matrix(probs, Ytest)
     return m
 
-def toy_linear_1d_classification(seed=default_seed):
+def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
     """
     Simple 1D classification example using EP approximation
 
@@ -58,21 +58,23 @@ def toy_linear_1d_classification(seed=default_seed):
     m = GPy.models.GPClassification(data['X'], Y)
 
     # Optimize
-    #m.update_likelihood_approximation()
-    # Parameters optimization:
-    #m.optimize()
-    #m.update_likelihood_approximation()
-    m.pseudo_EM()
+    if optimize:
+        #m.update_likelihood_approximation()
+        # Parameters optimization:
+        #m.optimize()
+        #m.update_likelihood_approximation()
+        m.pseudo_EM()
 
     # Plot
-    fig, axes = pb.subplots(2,1)
-    m.plot_f(ax=axes[0])
-    m.plot(ax=axes[1])
-    print(m)
+    if plot:
+        fig, axes = pb.subplots(2, 1)
+        m.plot_f(ax=axes[0])
+        m.plot(ax=axes[1])
 
+    print m
     return m
 
-def toy_linear_1d_classification_laplace(seed=default_seed):
+def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
     """
     Simple 1D classification example using Laplace approximation
 
@@ -90,24 +92,25 @@ def toy_linear_1d_classification_laplace(seed=default_seed):
 
     # Model definition
     m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
-
     print m
+
     # Optimize
-    #m.update_likelihood_approximation()
-    # Parameters optimization:
-    m.optimize('bfgs', messages=1)
-    #m.pseudo_EM()
+    if optimize:
+        #m.update_likelihood_approximation()
+        # Parameters optimization:
+        m.optimize('bfgs', messages=1)
+        #m.pseudo_EM()
 
     # Plot
-    fig, axes = pb.subplots(2,1)
-    m.plot_f(ax=axes[0])
-    m.plot(ax=axes[1])
-    print(m)
+    if plot:
+        fig, axes = pb.subplots(2, 1)
+        m.plot_f(ax=axes[0])
+        m.plot(ax=axes[1])
 
+    print m
     return m
 
-
-def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
+def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
     """
     Sparse 1D classification example
 
@@ -121,24 +124,26 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
     Y[Y.flatten() == -1] = 0
 
     # Model definition
-    m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
-    m['.*len']= 4.
+    m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing)
+    m['.*len'] = 4.
 
     # Optimize
-    #m.update_likelihood_approximation()
-    # Parameters optimization:
-    #m.optimize()
-    m.pseudo_EM()
+    if optimize:
+        #m.update_likelihood_approximation()
+        # Parameters optimization:
+        #m.optimize()
+        m.pseudo_EM()
 
     # Plot
-    fig, axes = pb.subplots(2,1)
-    m.plot_f(ax=axes[0])
-    m.plot(ax=axes[1])
-    print(m)
+    if plot:
+        fig, axes = pb.subplots(2, 1)
+        m.plot_f(ax=axes[0])
+        m.plot(ax=axes[1])
 
+    print m
     return m
 
-def toy_heaviside(seed=default_seed):
+def toy_heaviside(seed=default_seed, optimize=True, plot=True):
     """
     Simple 1D classification example using a heavy side gp transformation
 
@@ -153,24 +158,26 @@ def toy_heaviside(seed=default_seed):
 
     # Model definition
     noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
-    likelihood = GPy.likelihoods.EP(Y,noise_model)
+    likelihood = GPy.likelihoods.EP(Y, noise_model)
     m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
 
     # Optimize
-    m.update_likelihood_approximation()
-    # Parameters optimization:
-    m.optimize()
-    #m.pseudo_EM()
+    if optimize:
+        m.update_likelihood_approximation()
+        # Parameters optimization:
+        m.optimize()
+        #m.pseudo_EM()
 
     # Plot
-    fig, axes = pb.subplots(2,1)
-    m.plot_f(ax=axes[0])
-    m.plot(ax=axes[1])
-    print(m)
+    if plot:
+        fig, axes = pb.subplots(2, 1)
+        m.plot_f(ax=axes[0])
+        m.plot(ax=axes[1])
 
+    print m
     return m
 
-def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None):
+def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
     """
     Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
 
@@ -187,7 +194,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
     Y[Y.flatten()==-1] = 0
 
     if model_type == 'Full':
-        m = GPy.models.GPClassification(data['X'], Y,kernel=kernel)
+        m = GPy.models.GPClassification(data['X'], Y, kernel=kernel)
 
     elif model_type == 'DTC':
         m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
@@ -197,8 +204,11 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
         m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
         m['.*len'] = 3.
 
-    m.pseudo_EM()
-    print(m)
-    m.plot()
+    if optimize:
+        m.pseudo_EM()
 
+    if plot:
+        m.plot()
+
+    print m
     return m
diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_gaussian.py
index 620efc5f..46849e01 100644
--- a/GPy/examples/non_gaussian.py
+++ b/GPy/examples/non_gaussian.py
@@ -114,7 +114,7 @@ def student_t_approx(optimize=True, plot=True):
 
     return m1, m2, m3, m4
 
-def boston_example():
+def boston_example(optimize=True, plot=True):
     import sklearn
     from sklearn.cross_validation import KFold
     optimizer='bfgs'
@@ -143,7 +143,6 @@ def boston_example():
         noise = 1e-1 #np.exp(-2)
         rbf_len = 0.5
         data_axis_plot = 4
-        plot = False
         kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
         kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
 
@@ -158,17 +157,13 @@ def boston_example():
         mgp['rbf_len'] = rbf_len
         mgp['noise'] = noise
         print mgp
-        mgp.optimize(optimizer=optimizer, messages=messages)
+        if optimize:
+            mgp.optimize(optimizer=optimizer, messages=messages)
         Y_test_pred = mgp.predict(X_test)
         score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
         pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
         print mgp
         print pred_density
-        if plot:
-            plt.figure()
-            plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
-            plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
-            plt.title('GP gauss')
 
         print "Gaussian Laplace GP"
         N, D = Y_train.shape
@@ -181,20 +176,13 @@ def boston_example():
         mg['rbf_len'] = rbf_len
         mg['noise'] = noise
         print mg
-        try:
+        if optimize:
             mg.optimize(optimizer=optimizer, messages=messages)
-        except Exception:
-            print "Blew up"
         Y_test_pred = mg.predict(X_test)
         score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
         pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
         print pred_density
         print mg
-        if plot:
-            plt.figure()
-            plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
-            plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
-            plt.title('Lap gauss')
 
         for stu_num, df in enumerate(degrees_freedoms):
             #Student T
@@ -208,61 +196,71 @@ def boston_example():
             mstu_t['rbf_len'] = rbf_len
             mstu_t['t_noise'] = noise
             print mstu_t
-            try:
+            if optimize:
                 mstu_t.optimize(optimizer=optimizer, messages=messages)
-            except Exception:
-                print "Blew up"
             Y_test_pred = mstu_t.predict(X_test)
             score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
             pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
             print pred_density
             print mstu_t
-            if plot:
-                plt.figure()
-                plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
-                plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
-                plt.title('Stu t {}df'.format(df))
+
+    if plot:
+        plt.figure()
+        plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
+        plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
+        plt.title('GP gauss')
+
+        plt.figure()
+        plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
+        plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
+        plt.title('Lap gauss')
+
+        plt.figure()
+        plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
+        plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
+        plt.title('Stu t {}df'.format(df))
 
     print "Average scores: {}".format(np.mean(score_folds, 1))
     print "Average pred density: {}".format(np.mean(pred_density, 1))
 
-    #Plotting
-    stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
-    legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
+    if plot:
+        #Plotting
+        stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
+        legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
 
-    #Plot boxplots for RMSE density
-    fig = plt.figure()
-    ax=fig.add_subplot(111)
-    plt.title('RMSE')
-    bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
-    plt.setp(bp['boxes'], color='black')
-    plt.setp(bp['whiskers'], color='black')
-    plt.setp(bp['fliers'], color='red', marker='+')
-    xtickNames = plt.setp(ax, xticklabels=legends)
-    plt.setp(xtickNames, rotation=45, fontsize=8)
-    ax.set_ylabel('RMSE')
-    ax.set_xlabel('Distribution')
-    #Make grid and put it below boxes
-    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
-              alpha=0.5)
-    ax.set_axisbelow(True)
+        #Plot boxplots for RMSE density
+        fig = plt.figure()
+        ax=fig.add_subplot(111)
+        plt.title('RMSE')
+        bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
+        plt.setp(bp['boxes'], color='black')
+        plt.setp(bp['whiskers'], color='black')
+        plt.setp(bp['fliers'], color='red', marker='+')
+        xtickNames = plt.setp(ax, xticklabels=legends)
+        plt.setp(xtickNames, rotation=45, fontsize=8)
+        ax.set_ylabel('RMSE')
+        ax.set_xlabel('Distribution')
+        #Make grid and put it below boxes
+        ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
+                alpha=0.5)
+        ax.set_axisbelow(True)
 
-    #Plot boxplots for predictive density
-    fig = plt.figure()
-    ax=fig.add_subplot(111)
-    plt.title('Predictive density')
-    bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
-    plt.setp(bp['boxes'], color='black')
-    plt.setp(bp['whiskers'], color='black')
-    plt.setp(bp['fliers'], color='red', marker='+')
-    xtickNames = plt.setp(ax, xticklabels=legends[1:])
-    plt.setp(xtickNames, rotation=45, fontsize=8)
-    ax.set_ylabel('Mean Log probability P(Y*|Y)')
-    ax.set_xlabel('Distribution')
-    #Make grid and put it below boxes
-    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
-              alpha=0.5)
-    ax.set_axisbelow(True)
+        #Plot boxplots for predictive density
+        fig = plt.figure()
+        ax=fig.add_subplot(111)
+        plt.title('Predictive density')
+        bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
+        plt.setp(bp['boxes'], color='black')
+        plt.setp(bp['whiskers'], color='black')
+        plt.setp(bp['fliers'], color='red', marker='+')
+        xtickNames = plt.setp(ax, xticklabels=legends[1:])
+        plt.setp(xtickNames, rotation=45, fontsize=8)
+        ax.set_ylabel('Mean Log probability P(Y*|Y)')
+        ax.set_xlabel('Distribution')
+        #Make grid and put it below boxes
+        ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
+                alpha=0.5)
+        ax.set_axisbelow(True)
     return mstu_t
 
 def precipitation_example():
diff --git a/GPy/examples/stochastic.py b/GPy/examples/stochastic.py
index 73daef36..c302ec7d 100644
--- a/GPy/examples/stochastic.py
+++ b/GPy/examples/stochastic.py
@@ -5,7 +5,7 @@ import pylab as pb
 import numpy as np
 import GPy
 
-def toy_1d():
+def toy_1d(optimize=True, plot=True):
     N = 2000
     M = 20
 
@@ -20,15 +20,18 @@ def toy_1d():
 
     m.param_steplength = 1e-4
 
-    fig = pb.figure()
-    ax = fig.add_subplot(111)
-    def cb():
-        ax.cla()
-        m.plot(ax=ax,Z_height=-3)
-        ax.set_ylim(-3,3)
-        fig.canvas.draw()
+    if plot:
+        fig = pb.figure()
+        ax = fig.add_subplot(111)
+        def cb(foo):
+            ax.cla()
+            m.plot(ax=ax,Z_height=-3)
+            ax.set_ylim(-3,3)
+            fig.canvas.draw()
 
-    m.optimize(500, callback=cb, callback_interval=1)
+    if optimize:
+        m.optimize(500, callback=cb, callback_interval=1)
 
-    m.plot_traces()
+    if plot:
+        m.plot_traces()
     return m