diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 441d05a5..5e3eb964 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -14,20 +14,20 @@ default_seed = np.random.seed(123344)
 def BGPLVM(seed=default_seed):
     N = 10
     M = 3
-    input_dim = 2
+    Q = 2
     D = 4
     # generate GPLVM-like data
-    X = np.random.rand(N, input_dim)
-    k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+    X = np.random.rand(N, Q)
+    k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
     K = k.K(X)
     Y = np.random.multivariate_normal(np.zeros(N), K, D).T
 
-    k = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.linear(input_dim, ARD=True) + GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.white(input_dim)
-    # k = GPy.kern.rbf(input_dim) + GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim)
-    # k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
-    # k = GPy.kern.rbf(input_dim, ARD = False)  + GPy.kern.white(input_dim, 0.00001)
+    k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
+    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
 
-    m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M)
+    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
     m.constrain_positive('(rbf|bias|noise|white|S)')
     # m.constrain_fixed('S', 1)
 
@@ -63,7 +63,7 @@ def GPLVM_oil_100(optimize=True):
     m.plot_latent(labels=m.data_labels)
     return m
 
-def swiss_roll(optimize=True, N=1000, M=15, input_dim=4, sigma=.2, plot=False):
+def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
     from GPy.util.datasets import swiss_roll
     from GPy.core.transformations import logexp_clipped
 
@@ -79,10 +79,10 @@ def swiss_roll(optimize=True, N=1000, M=15, input_dim=4, sigma=.2, plot=False):
         from sklearn.manifold.isomap import Isomap
         iso = Isomap().fit(Y)
         X = iso.embedding_
-        if input_dim > 2:
-            X = np.hstack((X, np.random.randn(N, input_dim - 2)))
+        if Q > 2:
+            X = np.hstack((X, np.random.randn(N, Q - 2)))
     except ImportError:
-        X = np.random.randn(N, input_dim)
+        X = np.random.randn(N, Q)
 
     if plot:
         from mpl_toolkits import mplot3d
@@ -98,14 +98,14 @@ def swiss_roll(optimize=True, N=1000, M=15, input_dim=4, sigma=.2, plot=False):
 
 
     var = .5
-    S = (var * np.ones_like(X) + np.clip(np.random.randn(N, input_dim) * var ** 2,
+    S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
                                          - (1 - var),
                                          (1 - var))) + .001
     Z = np.random.permutation(X)[:M]
 
-    kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim, np.exp(-2)) + GPy.kern.white(input_dim, np.exp(-2))
+    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
 
-    m = Bayesian_GPLVM(Y, input_dim, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
+    m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
     m.data_colors = c
     m.data_t = t
 
@@ -118,18 +118,18 @@ def swiss_roll(optimize=True, N=1000, M=15, input_dim=4, sigma=.2, plot=False):
         m.optimize('scg', messages=1)
     return m
 
-def BGPLVM_oil(optimize=True, N=100, input_dim=5, M=25, max_f_eval=4e3, plot=False, **k):
+def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
     np.random.seed(0)
     data = GPy.util.datasets.oil()
     from GPy.core.transformations import logexp_clipped
 
     # create simple GP model
-    kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim, np.exp(-2)) + GPy.kern.white(input_dim, np.exp(-2))
+    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
     Y = data['X'][:N]
     Yn = Y - Y.mean(0)
     Yn /= Yn.std(0)
 
-    m = GPy.models.Bayesian_GPLVM(Yn, input_dim, kernel=kernel, M=M, **k)
+    m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
     m.data_labels = data['Y'][:N].argmax(axis=1)
 
     # m.constrain('variance|leng', logexp_clipped())
@@ -168,7 +168,7 @@ def oil_100():
 
 
 
-def _simulate_sincos(D1, D2, D3, N, M, input_dim, plot_sim=False):
+def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
     x = np.linspace(0, 4 * np.pi, N)[:, None]
     s1 = np.vectorize(lambda x: np.sin(x))
     s2 = np.vectorize(lambda x: np.cos(x))
@@ -228,13 +228,13 @@ def bgplvm_simulation_matlab_compare():
     Y = sim_data['Y']
     S = sim_data['S']
     mu = sim_data['mu']
-    M, [_, input_dim] = 3, mu.shape
+    M, [_, Q] = 3, mu.shape
 
     from GPy.models import mrd
     from GPy import kern
     reload(mrd); reload(kern)
-    k = kern.linear(input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
-    m = Bayesian_GPLVM(Y, input_dim, init="PCA", M=M, kernel=k,
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
 #                        X=mu,
 #                        X_variance=S,
                        _debug=False)
@@ -248,8 +248,8 @@ def bgplvm_simulation(optimize='scg',
                       plot=True,
                       max_f_eval=2e4):
 #     from GPy.core.transformations import logexp_clipped
-    D1, D2, D3, N, M, input_dim = 15, 8, 8, 100, 3, 5
-    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, input_dim, plot)
+    D1, D2, D3, N, M, Q = 15, 8, 8, 100, 3, 5
+    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot)
 
     from GPy.models import mrd
     from GPy import kern
@@ -258,12 +258,12 @@ def bgplvm_simulation(optimize='scg',
 
     Y = Ylist[0]
 
-    k = kern.linear(input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2)) # + kern.bias(input_dim)
-    m = Bayesian_GPLVM(Y, input_dim, init="PCA", M=M, kernel=k, _debug=True)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
+    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
     # m.constrain('variance|noise', logexp_clipped())
     m.ensure_default_constraints()
     m['noise'] = Y.var() / 100.
-    m['linear_variance'] = .001
+    m['linear_variance'] = .01
 
     if optimize:
         print "Optimizing model:"
@@ -271,24 +271,21 @@ def bgplvm_simulation(optimize='scg',
                    max_f_eval=max_f_eval,
                    messages=True, gtol=1e-6)
     if plot:
-        import pylab
-        m.plot_X_1d()
-        pylab.figure('BGPLVM Simulation ARD Parameters');
-        pylab.axis();
-        m.kern.plot_ARD()
+        m.plot_X_1d("BGPLVM Latent Space 1D")
+        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
     return m
 
 def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
-    D1, D2, D3, N, M, input_dim = 150, 200, 400, 500, 3, 7
-    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, input_dim, plot_sim)
+    D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7
+    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
 
     from GPy.models import mrd
     from GPy import kern
 
     reload(mrd); reload(kern)
 
-    k = kern.linear(input_dim, [.05] * input_dim, ARD=True) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
-    m = mrd.MRD(Ylist, input_dim=input_dim, M=M, kernels=k, initx="", initz='permute', **kw)
+    k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw)
 
     for i, Y in enumerate(Ylist):
         m['{}_noise'.format(i + 1)] = Y.var() / 100.
@@ -302,21 +299,21 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
         print "Optimizing Model:"
         m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4)
     if plot:
-        m.plot_X_1d()
-        m.plot_scales()
+        m.plot_X_1d("MRD Latent Space 1D")
+        m.plot_scales("MRD Scales")
     return m
 
 def brendan_faces():
     from GPy import kern
     data = GPy.util.datasets.brendan_faces()
-    input_dim = 2
+    Q = 2
     Y = data['Y'][0:-1:10, :]
     # Y = data['Y']
     Yn = Y - Y.mean()
     Yn /= Yn.std()
 
-    m = GPy.models.GPLVM(Yn, input_dim)
-    # m = GPy.models.Bayesian_GPLVM(Yn, input_dim, M=100)
+    m = GPy.models.GPLVM(Yn, Q)
+    # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100)
 
     # optimize
     m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@@ -379,11 +376,11 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
 #     X -= X.mean(axis=0)
 #     X /= X.std(axis=0)
 #
-#     input_dim = 10
+#     Q = 10
 #     M = 30
 #
-#     kernel = GPy.kern.rbf(input_dim, ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
-#     m = GPy.models.Bayesian_GPLVM(X, input_dim, kernel=kernel, M=M)
+#     kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
+#     m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
 #     # m.scale_factor = 100.0
 #     m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
 #     from sklearn import cluster
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index ce8b89a3..e69c4840 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -241,22 +241,22 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         x = np.arange(self.X.shape[0])
         for i in range(self.X.shape[1]):
             if ax is None:
-                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
+                a = fig.add_subplot(self.X.shape[1], 1, i + 1)
             elif isinstance(ax, (tuple, list)):
-                ax = ax[i]
+                a = ax[i]
             else:
                 raise ValueError("Need one ax per latent dimnesion input_dim")
-            ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
-            ax.fill_between(x,
+            a.plot(self.X, c='k', alpha=.3)
+            plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
+            a.fill_between(x,
                             self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
                             self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
                             facecolor=plots[-1].get_color(),
                             alpha=.3)
-            ax.legend(borderaxespad=0.)
-            ax.set_xlim(x.min(), x.max())
+            a.legend(borderaxespad=0.)
+            a.set_xlim(x.min(), x.max())
             if i < self.X.shape[1] - 1:
-                ax.set_xticklabels('')
+                a.set_xticklabels('')
         pylab.draw()
         fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
         return fig
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index 641438b0..e91298aa 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -261,12 +261,12 @@ class MRD(model):
             fig = pylab.figure(num=fignum, figsize=(4 * len(self.bgplvms), 3))
         for i, g in enumerate(self.bgplvms):
             if axes is None:
-                axes = fig.add_subplot(1, len(self.bgplvms), i + 1)
+                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
             elif isinstance(axes, (tuple, list)):
-                axes = axes[i]
+                ax = axes[i]
             else:
                 raise ValueError("Need one axes per latent dimension input_dim")
-            plotf(i, g, axes)
+            plotf(i, g, ax)
         pylab.draw()
         if axes is None:
             fig.tight_layout()
@@ -277,20 +277,20 @@ class MRD(model):
     def plot_X_1d(self):
         return self.gref.plot_X_1d()
 
-    def plot_X(self, fignum="MRD Predictions", ax=None):
+    def plot_X(self, fignum=None, ax=None):
         fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
         return fig
 
-    def plot_predict(self, fignum="MRD Predictions", ax=None, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs))
+    def plot_predict(self, fignum=None, ax=None, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs))
         return fig
 
-    def plot_scales(self, fignum="MRD Scales", ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(axes=ax, *args, **kwargs))
+    def plot_scales(self, fignum=None, ax=None, *args, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
         return fig
 
-    def plot_latent(self, fignum="MRD Latent Spaces", ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(axes=ax, *args, **kwargs))
+    def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):