diff --git a/GPy/core/GP.py b/GPy/core/GP.py
index 01648189..04ea7af1 100644
--- a/GPy/core/GP.py
+++ b/GPy/core/GP.py
@@ -126,7 +126,7 @@ class GP(GPBase):
         Arguments
         ---------
         :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.Q
+        :type Xnew: np.ndarray, Nnew x self.input_dim
         :param which_parts:  specifies which outputs kernel(s) to use in prediction
         :type which_parts: ('all', list of bools)
         :param full_cov: whether to return the folll covariance matrix, or just the diagonal
diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py
index 26bed691..aa71b550 100644
--- a/GPy/core/gp_base.py
+++ b/GPy/core/gp_base.py
@@ -13,7 +13,7 @@ class GPBase(model.model):
     def __init__(self, X, likelihood, kernel, normalize_X=False):
         self.X = X
         assert len(self.X.shape) == 2
-        self.N, self.Q = self.X.shape
+        self.N, self.input_dim = self.X.shape
         assert isinstance(kernel, kern.kern)
         self.kern = kernel
         self.likelihood = likelihood
@@ -25,8 +25,8 @@ class GPBase(model.model):
             self._Xstd = X.std(0)[None, :]
             self.X = (X.copy() - self._Xmean) / self._Xstd
         else:
-            self._Xmean = np.zeros((1,self.Q))
-            self._Xstd = np.ones((1,self.Q))
+            self._Xmean = np.zeros((1,self.input_dim))
+            self._Xstd = np.ones((1,self.input_dim))
 
         model.model.__init__(self)
 
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 2acb9963..f9dcaae4 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -392,7 +392,11 @@ class model(parameterised):
             if target_param is None:
                 param_list = range(len(x))
             else:
-                param_list = self.grep_param_names(target_param)
+                param_list = self.grep_param_names(target_param, transformed=True, search=True)
+                if not param_list:
+                    print "No free parameters to check"
+                    return
+
 
             for i in param_list:
                 xx = x.copy()
diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py
index d1abb9c3..c8d8ce4d 100644
--- a/GPy/core/parameterised.py
+++ b/GPy/core/parameterised.py
@@ -119,7 +119,7 @@ class parameterised(object):
         """Unties all parameters by setting tied_indices to an empty list."""
         self.tied_indices = []
 
-    def grep_param_names(self, regexp):
+    def grep_param_names(self, regexp, transformed=False, search=False):
         """
         :param regexp: regular expression to select parameter names
         :type regexp: re | str | int
@@ -129,13 +129,21 @@ class parameterised(object):
           Other objects are passed through - i.e. integers which weren't meant for grepping
         """
 
+        if transformed:
+            names = self._get_param_names_transformed()
+        else:
+            names = self._get_param_names()
+
         if type(regexp) in [str, np.string_, np.str]:
             regexp = re.compile(regexp)
-            return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0]
         elif type(regexp) is re._pattern_type:
-            return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0]
+            pass
         else:
             return regexp
+        if search:
+            return np.nonzero([regexp.search(name) for name in names])[0]
+        else:
+            return np.nonzero([regexp.match(name) for name in names])[0]
 
     def Nparam_transformed(self):
         removed = 0
@@ -223,7 +231,14 @@ class parameterised(object):
         To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
         """
         matches = self.grep_param_names(regexp)
-        assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
+        overlap = set(matches).intersection(set(self.all_constrained_indices()))
+        if overlap:
+            self.unconstrain(np.asarray(list(overlap)))
+            print 'Warning: re-constraining these parameters'
+            pn = self._get_param_names()
+            for i in overlap:
+                print pn[i]
+
         self.fixed_indices.append(matches)
         if value != None:
             self.fixed_values.append(value)
diff --git a/GPy/core/sparse_GP.py b/GPy/core/sparse_GP.py
index 55cff38f..c4fe6763 100644
--- a/GPy/core/sparse_GP.py
+++ b/GPy/core/sparse_GP.py
@@ -13,15 +13,15 @@ class sparse_GP(GPBase):
     Variational sparse GP model
 
     :param X: inputs
-    :type X: np.ndarray (N x Q)
+    :type X: np.ndarray (N x input_dim)
     :param likelihood: a likelihood instance, containing the observed data
     :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
     :param kernel : the kernel (covariance function). See link kernels
     :type kernel: a GPy.kern.kern instance
     :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
-    :type X_variance: np.ndarray (N x Q) | None
+    :type X_variance: np.ndarray (N x input_dim) | None
     :param Z: inducing inputs (optional, see note)
-    :type Z: np.ndarray (M x Q) | None
+    :type Z: np.ndarray (M x input_dim) | None
     :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
     :type M: int
     :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
@@ -152,7 +152,7 @@ class sparse_GP(GPBase):
         return A + B + C + D + self.likelihood.Z
 
     def _set_params(self, p):
-        self.Z = p[:self.M * self.Q].reshape(self.M, self.Q)
+        self.Z = p[:self.M * self.input_dim].reshape(self.M, self.input_dim)
         self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
         self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
         self._compute_kernel_matrices()
@@ -252,9 +252,9 @@ class sparse_GP(GPBase):
         Arguments
         ---------
         :param Xnew: The points at which to make a prediction
-        :type Xnew: np.ndarray, Nnew x self.Q
+        :type Xnew: np.ndarray, Nnew x self.input_dim
         :param X_variance_new: The uncertainty in the prediction points
-        :type X_variance_new: np.ndarray, Nnew x self.Q
+        :type X_variance_new: np.ndarray, Nnew x self.input_dim
         :param which_parts:  specifies which outputs kernel(s) to use in prediction
         :type which_parts: ('all', list of bools)
         :param full_cov: whether to return the folll covariance matrix, or just the diagonal
@@ -286,6 +286,9 @@ class sparse_GP(GPBase):
             fig = pb.figure(num=fignum)
             ax = fig.add_subplot(111)
 
+        if which_data is 'all':
+            which_data = slice(None)
+
         GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
         if self.X.shape[1] == 1:
             if self.has_uncertain_inputs:
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index 22feb28d..5e3eb964 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -263,7 +263,7 @@ def bgplvm_simulation(optimize='scg',
     # m.constrain('variance|noise', logexp_clipped())
     m.ensure_default_constraints()
     m['noise'] = Y.var() / 100.
-    m['linear_variance'] = .001
+    m['linear_variance'] = .01
 
     if optimize:
         print "Optimizing model:"
@@ -271,11 +271,8 @@ def bgplvm_simulation(optimize='scg',
                    max_f_eval=max_f_eval,
                    messages=True, gtol=1e-6)
     if plot:
-        import pylab
-        m.plot_X_1d()
-        pylab.figure('BGPLVM Simulation ARD Parameters');
-        pylab.axis();
-        m.kern.plot_ARD()
+        m.plot_X_1d("BGPLVM Latent Space 1D")
+        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
     return m
 
 def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
@@ -302,8 +299,8 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
         print "Optimizing Model:"
         m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4)
     if plot:
-        m.plot_X_1d()
-        m.plot_scales()
+        m.plot_X_1d("MRD Latent Space 1D")
+        m.plot_scales("MRD Scales")
     return m
 
 def brendan_faces():
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index e26c7afe..310a0691 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -10,7 +10,7 @@ import numpy as np
 import GPy
 
 
-def toy_rbf_1d(max_nb_eval_optim=100):
+def toy_rbf_1d(optim_iters=100):
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d()
 
@@ -19,29 +19,32 @@ def toy_rbf_1d(max_nb_eval_optim=100):
 
     # optimize
     m.ensure_default_constraints()
-    m.optimize(max_f_eval=max_nb_eval_optim)
+    m.optimize(max_f_eval=optim_iters)
     # plot
     m.plot()
     print(m)
     return m
 
-def rogers_girolami_olympics(max_nb_eval_optim=100):
+def rogers_girolami_olympics(optim_iters=100):
     """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
     data = GPy.util.datasets.rogers_girolami_olympics()
 
     # create simple GP model
     m = GPy.models.GP_regression(data['X'],data['Y'])
 
+    #set the lengthscale to be something sensible (defaults to 1)
+    m['rbf_lengthscale'] = 10
+
     # optimize
     m.ensure_default_constraints()
-    m.optimize(max_f_eval=max_nb_eval_optim)
+    m.optimize(max_f_eval=optim_iters)
 
     # plot
     m.plot(plot_limits = (1850, 2050))
     print(m)
     return m
 
-def toy_rbf_1d_50(max_nb_eval_optim=100):
+def toy_rbf_1d_50(optim_iters=100):
     """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
     data = GPy.util.datasets.toy_rbf_1d_50()
 
@@ -50,14 +53,14 @@ def toy_rbf_1d_50(max_nb_eval_optim=100):
 
     # optimize
     m.ensure_default_constraints()
-    m.optimize(max_f_eval=max_nb_eval_optim)
+    m.optimize(max_f_eval=optim_iters)
 
     # plot
     m.plot()
     print(m)
     return m
 
-def silhouette(max_nb_eval_optim=100):
+def silhouette(optim_iters=100):
     """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
     data = GPy.util.datasets.silhouette()
 
@@ -66,12 +69,12 @@ def silhouette(max_nb_eval_optim=100):
 
     # optimize
     m.ensure_default_constraints()
-    m.optimize(messages=True,max_f_eval=max_nb_eval_optim)
+    m.optimize(messages=True,max_f_eval=optim_iters)
 
     print(m)
     return m
 
-def coregionalisation_toy2(max_nb_eval_optim=100):
+def coregionalisation_toy2(optim_iters=100):
     """
     A simple demonstration of coregionalisation on two sinusoidal functions.
     """
@@ -90,7 +93,7 @@ def coregionalisation_toy2(max_nb_eval_optim=100):
     m.constrain_fixed('.*rbf_var',1.)
     #m.constrain_positive('.*kappa')
     m.ensure_default_constraints()
-    m.optimize('sim',messages=1,max_f_eval=max_nb_eval_optim)
+    m.optimize('sim',messages=1,max_f_eval=optim_iters)
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -103,7 +106,7 @@ def coregionalisation_toy2(max_nb_eval_optim=100):
     pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
     return m
 
-def coregionalisation_toy(max_nb_eval_optim=100):
+def coregionalisation_toy(optim_iters=100):
     """
     A simple demonstration of coregionalisation on two sinusoidal functions.
     """
@@ -122,7 +125,7 @@ def coregionalisation_toy(max_nb_eval_optim=100):
     m.constrain_fixed('.*rbf_var',1.)
     #m.constrain_positive('kappa')
     m.ensure_default_constraints()
-    m.optimize(max_f_eval=max_nb_eval_optim)
+    m.optimize(max_f_eval=optim_iters)
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -136,7 +139,7 @@ def coregionalisation_toy(max_nb_eval_optim=100):
     return m
 
 
-def coregionalisation_sparse(max_nb_eval_optim=100):
+def coregionalisation_sparse(optim_iters=100):
     """
     A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
     """
@@ -161,7 +164,7 @@ def coregionalisation_sparse(max_nb_eval_optim=100):
     #m.constrain_positive('kappa')
     m.constrain_fixed('iip')
     m.ensure_default_constraints()
-    m.optimize_restarts(5, robust=True, messages=1, max_f_eval=max_nb_eval_optim)
+    m.optimize_restarts(5, robust=True, messages=1, max_f_eval=optim_iters)
 
     pb.figure()
     Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
@@ -178,7 +181,7 @@ def coregionalisation_sparse(max_nb_eval_optim=100):
     return m
 
 
-def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, max_nb_eval_optim=100):
+def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=100):
     """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
 
     # Contour over a range of length scales and signal/noise ratios.
@@ -216,7 +219,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
 
         # optimize
         m.ensure_default_constraints()
-        m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=max_nb_eval_optim)
+        m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters)
 
         optim_point_x[1] = m.get('rbf_lengthscale')
         optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
@@ -263,7 +266,7 @@ def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf
         lls.append(length_scale_lls)
     return np.array(lls)
 
-def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100):
+def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100):
     """Run a 1D example of a sparse GP regression."""
     # sample inputs and outputs
     X = np.random.uniform(-3.,3.,(N,1))
@@ -278,11 +281,11 @@ def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100):
     m.ensure_default_constraints()
 
     m.checkgrad(verbose=1)
-    m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim)
+    m.optimize('tnc', messages = 1, max_f_eval=optim_iters)
     m.plot()
     return m
 
-def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100):
+def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100):
     """Run a 2D example of a sparse GP regression."""
     X = np.random.uniform(-3.,3.,(N,2))
     Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
@@ -303,13 +306,15 @@ def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100):
 
     # optimize and plot
     pb.figure()
-    m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim)
+    m.optimize('tnc', messages = 1, max_f_eval=optim_iters)
     m.plot()
     print(m)
     return m
 
-def uncertain_inputs_sparse_regression(max_nb_eval_optim=100):
+def uncertain_inputs_sparse_regression(optim_iters=100):
     """Run a 1D example of a sparse GP regression with uncertain inputs."""
+    fig, axes = pb.subplots(1,2,figsize=(12,5))
+
     # sample inputs and outputs
     S = np.ones((20,1))
     X = np.random.uniform(-3.,3.,(20,1))
@@ -319,14 +324,22 @@ def uncertain_inputs_sparse_regression(max_nb_eval_optim=100):
 
     k = GPy.kern.rbf(1) + GPy.kern.white(1)
 
-    # create simple GP model
-    m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S)
-
-    # contrain all parameters to be positive
+    # create simple GP model - no input uncertainty on this one
+    m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z)
     m.ensure_default_constraints()
+    m.optimize('scg', messages=1, max_f_eval=optim_iters)
+    m.plot(ax=axes[0])
+    axes[0].set_title('no input uncertainty')
 
-    # optimize and plot
-    m.optimize('tnc', messages=1, max_f_eval=max_nb_eval_optim)
-    m.plot()
+
+    #the same model with uncertainty
+    m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S)
+    m.ensure_default_constraints()
+    m.optimize('scg', messages=1, max_f_eval=optim_iters)
+    m.plot(ax=axes[1])
+    axes[1].set_title('with input uncertainty')
     print(m)
+
+    fig.canvas.draw()
+
     return m
diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index 7543a819..c2a77e40 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -95,11 +95,11 @@ class opt_SGD(Optimizer):
         i = 0
 
         for s in param_shapes:
-            N, Q = s
-            X = x[i:i+N*Q].reshape(N, Q)
+            N, input_dim = s
+            X = x[i:i+N*input_dim].reshape(N, input_dim)
             X = X[samples]
             subset = np.append(subset, X.flatten())
-            i += N*Q
+            i += N*input_dim
 
         subset = np.append(subset, x[i:])
 
@@ -167,17 +167,17 @@ class opt_SGD(Optimizer):
         # self.model.constrained_positive_indices = p
         self.model.constrained_indices = c
 
-    def get_param_shapes(self, N = None, Q = None):
+    def get_param_shapes(self, N = None, input_dim = None):
         model_name = self.model.__class__.__name__
         if model_name == 'GPLVM':
-            return [(N, Q)]
+            return [(N, input_dim)]
         if model_name == 'Bayesian_GPLVM':
-            return [(N, Q), (N, Q)]
+            return [(N, input_dim), (N, input_dim)]
         else:
             raise NotImplementedError
 
     def step_with_missing_data(self, f_fp, X, step, shapes):
-        N, Q = X.shape
+        N, input_dim = X.shape
 
         if not sp.sparse.issparse(self.model.likelihood.Y):
             Y = self.model.likelihood.Y
@@ -269,7 +269,7 @@ class opt_SGD(Optimizer):
         self.model.likelihood._offset = 0.0
         self.model.likelihood._scale = 1.0
 
-        N, Q = self.model.X.shape
+        N, input_dim = self.model.X.shape
         D = self.model.likelihood.Y.shape[1]
         num_params = self.model._get_params()
         self.trace = []
@@ -302,7 +302,7 @@ class opt_SGD(Optimizer):
                 self.model.likelihood._set_params(sigma)
 
                 if missing_data:
-                    shapes = self.get_param_shapes(N, Q)
+                    shapes = self.get_param_shapes(N, input_dim)
                     f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
                 else:
                     self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T)
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 28e33b4b..0d3b4ccf 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -300,15 +300,15 @@ class kern(parameterised):
         return target
 
     def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
-        """return shapes are N,M,Q"""
+        """return shapes are N,M,input_dim"""
         target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
         [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
         return target_mu, target_S
 
     def psi2(self, Z, mu, S):
         """
-        :param Z: np.ndarray of inducing inputs (M x Q)
-        :param mu, S: np.ndarrays of means and variances (each N x Q)
+        :param Z: np.ndarray of inducing inputs (M x input_dim)
+        :param mu, S: np.ndarrays of means and variances (each N x input_dim)
         :returns psi2: np.ndarray (N,M,M)
         """
         target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py
index af3e60ea..8744eda0 100644
--- a/GPy/kern/linear.py
+++ b/GPy/kern/linear.py
@@ -168,7 +168,7 @@ class linear(kernpart):
             target += tmp.sum()
 
     def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
-        """Think N,M,M,Q """
+        """Think N,M,M,input_dim """
         self._psi_computations(Z, mu, S)
         AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
         AZZA = AZZA + AZZA.swapaxes(1, 2)
@@ -192,11 +192,11 @@ class linear(kernpart):
               else
                 factor = 2.0*dL_dpsi2(n,m,mm);
 
-              for(q=0;q<Q;q++){
+              for(q=0;q<input_dim;q++){
 
                 //take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
                 tmp = 0.0;
-                for(qq=0;qq<Q;qq++){
+                for(qq=0;qq<input_dim;qq++){
                   tmp += mu(n,qq)*AZZA(qq,m,mm,q);
                 }
 
@@ -215,9 +215,9 @@ class linear(kernpart):
                          'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
                          'extra_link_args'   : ['-lgomp']}
 
-        N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
+        N,M,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
         weave.inline(code, support_code=support_code, libraries=['gomp'],
-                     arg_names=['N','M','Q','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
+                     arg_names=['N','M','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
                      type_converters=weave.converters.blitz,**weave_options)
 
 
@@ -232,7 +232,7 @@ class linear(kernpart):
         int n,m,mm,q;
         #pragma omp parallel for private(n,mm,q)
         for(m=0;m<M;m++){
-          for(q=0;q<Q;q++){
+          for(q=0;q<input_dim;q++){
             for(mm=0;mm<M;mm++){
               for(n=0;n<N;n++){
                 target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
@@ -249,9 +249,9 @@ class linear(kernpart):
                          'extra_compile_args': ['-fopenmp -O3'],  #-march=native'],
                          'extra_link_args'   : ['-lgomp']}
 
-        N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
+        N,M,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
         weave.inline(code, support_code=support_code, libraries=['gomp'],
-                     arg_names=['N','M','Q','AZA','target','dL_dpsi2'],
+                     arg_names=['N','M','input_dim','AZA','target','dL_dpsi2'],
                      type_converters=weave.converters.blitz,**weave_options)
 
 
@@ -278,7 +278,7 @@ class linear(kernpart):
         muS_changed = not (np.array_equal(mu, self._mu) and np.array_equal(S, self._S))
         if Zv_changed:
             # Z has changed, compute Z specific stuff
-            # self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
+            # self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,input_dim
 #             self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
 #             [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
             self.ZA = Z * self.variances
@@ -291,5 +291,5 @@ class linear(kernpart):
             self.inner[:, diag_indices[0], diag_indices[1]] += S
             self._mu, self._S = mu.copy(), S.copy()
         if Zv_changed or muS_changed:
-            self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1)  # NOTE: self.ZAinner \in [M x N x Q]!
+            self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1)  # NOTE: self.ZAinner \in [M x N x input_dim]!
             self._psi2 = np.dot(self.ZAinner, self.ZA.T)
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 8b644241..a89b7d45 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -205,13 +205,13 @@ class rbf(kernpart):
 
     def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
         self._psi_computations(Z,mu,S)
-        term1 = self._psi2_Zdist/self.lengthscale2 # M, M, Q
-        term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
+        term1 = self._psi2_Zdist/self.lengthscale2 # M, M, input_dim
+        term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, input_dim
         dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
         target += (dL_dpsi2[:,:,:,None]*dZ).sum(0).sum(0)
 
     def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
-        """Think N,M,M,Q """
+        """Think N,M,M,input_dim """
         self._psi_computations(Z,mu,S)
         tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
         target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1)
@@ -242,9 +242,9 @@ class rbf(kernpart):
         #here are the "statistics" for psi1 and psi2
         if not np.array_equal(Z, self._Z):
             #Z has changed, compute Z specific stuff
-            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
-            self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
-            self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
+            self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,input_dim
+            self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,input_dim
+            self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,input_dim
             self._Z = Z
 
         if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
@@ -258,9 +258,9 @@ class rbf(kernpart):
             self._psi1 = self.variance*np.exp(self._psi1_exponent)
 
             #psi2
-            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
+            self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,input_dim
             self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
-            #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
+            #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,input_dim
             #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
             #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
             self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
@@ -269,11 +269,11 @@ class rbf(kernpart):
             self._Z, self._mu, self._S = Z, mu,S
 
     def weave_psi2(self,mu,Zhat):
-        N,Q = mu.shape
+        N,input_dim = mu.shape
         M = Zhat.shape[0]
 
-        mudist = np.empty((N,M,M,Q))
-        mudist_sq = np.empty((N,M,M,Q))
+        mudist = np.empty((N,M,M,input_dim))
+        mudist_sq = np.empty((N,M,M,input_dim))
         psi2_exponent = np.zeros((N,M,M))
         psi2 = np.empty((N,M,M))
 
@@ -284,7 +284,7 @@ class rbf(kernpart):
         if self.ARD:
             lengthscale2 = self.lengthscale2
         else:
-            lengthscale2 = np.ones(Q)*self.lengthscale2
+            lengthscale2 = np.ones(input_dim)*self.lengthscale2
         code = """
         double tmp;
 
@@ -292,7 +292,7 @@ class rbf(kernpart):
         for (int n=0; n<N; n++){
             for (int m=0; m<M; m++){
                for (int mm=0; mm<(m+1); mm++){
-                   for (int q=0; q<Q; q++){
+                   for (int q=0; q<input_dim; q++){
                        //compute mudist
                        tmp = mu(n,q) - Zhat(m,mm,q);
                        mudist(n,m,mm,q) = tmp;
@@ -325,7 +325,7 @@ class rbf(kernpart):
         #include <math.h>
         """
         weave.inline(code, support_code=support_code, libraries=['gomp'],
-                     arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
+                     arg_names=['N','M','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
                      type_converters=weave.converters.blitz,**self.weave_options)
 
         return mudist,mudist_sq, psi2_exponent, psi2
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index b7f7b42b..e69c4840 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -22,13 +22,13 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
     :param Y: observed data (np.ndarray) or GPy.likelihood
     :type Y: np.ndarray| GPy.likelihood instance
-    :param Q: latent dimensionality
-    :type Q: int
+    :param input_dim: latent dimensionality
+    :type input_dim: int
     :param init: initialisation method for the latent space
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, likelihood_or_Y, Q, X=None, X_variance=None, init='PCA', M=10,
+    def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', M=10,
                  Z=None, kernel=None, oldpsave=10, _debug=False,
                  **kwargs):
         if type(likelihood_or_Y) is np.ndarray:
@@ -37,7 +37,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             likelihood = likelihood_or_Y
 
         if X == None:
-            X = self.initialise_latent(init, Q, likelihood.Y)
+            X = self.initialise_latent(init, input_dim, likelihood.Y)
         self.init = init
 
         if X_variance is None:
@@ -48,7 +48,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         assert Z.shape[1] == X.shape[1]
 
         if kernel is None:
-            kernel = kern.rbf(Q) + kern.white(Q)
+            kernel = kern.rbf(input_dim) + kern.white(input_dim)
 
         self.oldpsave = oldpsave
         self._oldps = []
@@ -78,8 +78,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         self._oldps.insert(0, p.copy())
 
     def _get_param_names(self):
-        X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
-        S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
+        X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
+        S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
         return (X_names + S_names + sparse_GP._get_param_names(self))
 
     def _get_params(self):
@@ -101,10 +101,10 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
     def _set_params(self, x, save_old=True, save_count=0):
 #         try:
             x = self._clipped(x)
-            N, Q = self.N, self.Q
-            self.X = x[:self.X.size].reshape(N, Q).copy()
-            self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
-            sparse_GP._set_params(self, x[(2 * N * Q):])
+            N, input_dim = self.N, self.input_dim
+            self.X = x[:self.X.size].reshape(N, input_dim).copy()
+            self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
+            sparse_GP._set_params(self, x[(2 * N * input_dim):])
 #             self.oldps = x
 #         except (LinAlgError, FloatingPointError, ZeroDivisionError):
 #             print "\rWARNING: Caught LinAlgError, continueing without setting            "
@@ -131,7 +131,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
     def KL_divergence(self):
         var_mean = np.square(self.X).sum()
         var_S = np.sum(self.X_variance - np.log(self.X_variance))
-        return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
+        return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N
 
     def log_likelihood(self):
         ll = sparse_GP.log_likelihood(self)
@@ -196,16 +196,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         """
         assert not self.likelihood.is_heteroscedastic
         N_test = Y.shape[0]
-        Q = self.Z.shape[1]
-        means = np.zeros((N_test, Q))
-        covars = np.zeros((N_test, Q))
+        input_dim = self.Z.shape[1]
+        means = np.zeros((N_test, input_dim))
+        covars = np.zeros((N_test, input_dim))
 
         dpsi0 = -0.5 * self.D * self.likelihood.precision
         dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
         V = self.likelihood.precision * Y
         dpsi1 = np.dot(self.Cpsi1V, V.T)
 
-        start = np.zeros(self.Q * 2)
+        start = np.zeros(self.input_dim * 2)
 
         for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
             args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2)
@@ -222,12 +222,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         """
         Plot latent space X in 1D:
 
-            -if fig is given, create Q subplots in fig and plot in these
-            -if ax is given plot Q 1D latent space plots of X into each `axis`
+            -if fig is given, create input_dim subplots in fig and plot in these
+            -if ax is given plot input_dim 1D latent space plots of X into each `axis`
             -if neither fig nor ax is given create a figure with fignum and plot in there
 
         colors:
-            colors of different latent space dimensions Q
+            colors of different latent space dimensions input_dim
         """
         import pylab
         if ax is None:
@@ -241,28 +241,28 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
         x = np.arange(self.X.shape[0])
         for i in range(self.X.shape[1]):
             if ax is None:
-                ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
+                a = fig.add_subplot(self.X.shape[1], 1, i + 1)
             elif isinstance(ax, (tuple, list)):
-                ax = ax[i]
+                a = ax[i]
             else:
-                raise ValueError("Need one ax per latent dimnesion Q")
-            ax.plot(self.X, c='k', alpha=.3)
-            plots.extend(ax.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
-            ax.fill_between(x,
+                raise ValueError("Need one ax per latent dimnesion input_dim")
+            a.plot(self.X, c='k', alpha=.3)
+            plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
+            a.fill_between(x,
                             self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
                             self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
                             facecolor=plots[-1].get_color(),
                             alpha=.3)
-            ax.legend(borderaxespad=0.)
-            ax.set_xlim(x.min(), x.max())
+            a.legend(borderaxespad=0.)
+            a.set_xlim(x.min(), x.max())
             if i < self.X.shape[1] - 1:
-                ax.set_xticklabels('')
+                a.set_xticklabels('')
         pylab.draw()
         fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
         return fig
 
     def __getstate__(self):
-        return (self.likelihood, self.Q, self.X, self.X_variance,
+        return (self.likelihood, self.input_dim, self.X, self.X_variance,
                 self.init, self.M, self.Z, self.kern,
                 self.oldpsave, self._debug)
 
@@ -271,12 +271,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 
     def _debug_filter_params(self, x):
         start, end = 0, self.X.size,
-        X = x[start:end].reshape(self.N, self.Q)
+        X = x[start:end].reshape(self.N, self.input_dim)
         start, end = end, end + self.X_variance.size
-        X_v = x[start:end].reshape(self.N, self.Q)
-        start, end = end, end + (self.M * self.Q)
-        Z = x[start:end].reshape(self.M, self.Q)
-        start, end = end, end + self.Q
+        X_v = x[start:end].reshape(self.N, self.input_dim)
+        start, end = end, end + (self.M * self.input_dim)
+        Z = x[start:end].reshape(self.M, self.input_dim)
+        start, end = end, end + self.input_dim
         theta = x[start:]
         return X, X_v, Z, theta
 
@@ -414,24 +414,24 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #         caxkmmdl = divider.append_axes("right", "5%", pad="1%")
 #         cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
 
-#         Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-#                    loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
+#         input_dimleg = ax1.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
+#                    loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.15, 1, 1.15),
 #                    borderaxespad=0, mode="expand")
-        ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
+        ax2.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
+                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
-        ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
+        ax3.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
+                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
-        ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
+        ax4.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
+                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
-        ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
-                   loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
+        ax5.legend(Xlatentplts, [r"$input_dim_{}$".format(i + 1) for i in range(self.input_dim)],
+                   loc=3, ncol=self.input_dim, bbox_to_anchor=(0, 1.1, 1, 1.1),
                    borderaxespad=0, mode="expand")
         Lleg = ax1.legend()
         Lleg.draggable()
-#         ax1.add_artist(Qleg)
+#         ax1.add_artist(input_dimleg)
 
         indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
         indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index bfe422c8..5589304a 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -20,35 +20,35 @@ class GPLVM(GP):
 
     :param Y: observed data
     :type Y: np.ndarray
-    :param Q: latent dimensionality
-    :type Q: int
+    :param input_dim: latent dimensionality
+    :type input_dim: int
     :param init: initialisation method for the latent space
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False):
+    def __init__(self, Y, input_dim, init='PCA', X = None, kernel=None, normalize_Y=False):
         if X is None:
-            X = self.initialise_latent(init, Q, Y)
+            X = self.initialise_latent(init, input_dim, Y)
         if kernel is None:
-            kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+            kernel = kern.rbf(input_dim, ARD=input_dim>1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
         likelihood = Gaussian(Y, normalize=normalize_Y)
         GP.__init__(self, X, likelihood, kernel, normalize_X=False)
         self._set_params(self._get_params())
 
-    def initialise_latent(self, init, Q, Y):
+    def initialise_latent(self, init, input_dim, Y):
         if init == 'PCA':
-            return PCA(Y, Q)[0]
+            return PCA(Y, input_dim)[0]
         else:
-            return np.random.randn(Y.shape[0], Q)
+            return np.random.randn(Y.shape[0], input_dim)
 
     def _get_param_names(self):
-        return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self)
+        return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self)
 
     def _get_params(self):
         return np.hstack((self.X.flatten(), GP._get_params(self)))
 
     def _set_params(self,x):
-        self.X = x[:self.N*self.Q].reshape(self.N,self.Q).copy()
+        self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy()
         GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):
diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_FITC.py
index b0620bf5..a0bb508c 100644
--- a/GPy/models/generalized_FITC.py
+++ b/GPy/models/generalized_FITC.py
@@ -20,15 +20,15 @@ class generalized_FITC(sparse_GP):
     Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
 
     :param X: inputs
-    :type X: np.ndarray (N x Q)
+    :type X: np.ndarray (N x input_dim)
     :param likelihood: a likelihood instance, containing the observed data
     :type likelihood: GPy.likelihood.(Gaussian | EP)
     :param kernel : the kernel/covariance function. See link kernels
     :type kernel: a GPy kernel
     :param X_variance: The variance in the measurements of X (Gaussian variance)
-    :type X_variance: np.ndarray (N x Q) | None
+    :type X_variance: np.ndarray (N x input_dim) | None
     :param Z: inducing inputs (optional, see note)
-    :type Z: np.ndarray (M x Q) | None
+    :type Z: np.ndarray (M x input_dim) | None
     :param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
     :type M: int
     :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
@@ -44,7 +44,7 @@ class generalized_FITC(sparse_GP):
         sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
 
     def _set_params(self, p):
-        self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
+        self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim)
         self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
         self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
         self._compute_kernel_matrices()
diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py
index e1bfa947..e91298aa 100644
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@@ -23,8 +23,8 @@ class MRD(model):
     :type likelihood_list: [GPy.likelihood] | [Y1..Yy]
     :param names: names for different gplvm models
     :type names: [str]
-    :param Q: latent dimensionality (will raise
-    :type Q: int
+    :param input_dim: latent dimensionality (will raise
+    :type input_dim: int
     :param initx: initialisation method for the latent space
     :type initx: 'PCA'|'random'
     :param initz: initialisation method for inducing inputs
@@ -45,7 +45,7 @@ class MRD(model):
     :param kernels: list of kernels or kernel shared for all BGPLVMS
     :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
     """
-    def __init__(self, likelihood_or_Y_list, Q, M=10, names=None,
+    def __init__(self, likelihood_or_Y_list, input_dim, M=10, names=None,
                  kernels=None, initx='PCA',
                  initz='permute', _debug=False, **kw):
         if names is None:
@@ -61,14 +61,14 @@ class MRD(model):
             assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
         assert not ('kernel' in kw), "pass kernels through `kernels` argument"
 
-        self.Q = Q
+        self.input_dim = input_dim
         self.M = M
         self._debug = _debug
 
         self._init = True
         X = self._init_X(initx, likelihood_or_Y_list)
         Z = self._init_Z(initz, X)
-        self.bgplvms = [Bayesian_GPLVM(l, Q=Q, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
+        self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
         del self._init
 
         self.gref = self.bgplvms[0]
@@ -76,8 +76,8 @@ class MRD(model):
         self.nparams = nparams.cumsum()
 
         self.N = self.gref.N
-        self.NQ = self.N * self.Q
-        self.MQ = self.M * self.Q
+        self.NQ = self.N * self.input_dim
+        self.MQ = self.M * self.input_dim
 
         model.__init__(self) # @UndefinedVariable
         self._set_params(self._get_params())
@@ -143,8 +143,8 @@ class MRD(model):
         self._init_Z(initz, self.X)
 
     def _get_param_names(self):
-        # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
-        # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
+        # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
+        # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], [])
         n1 = self.gref._get_param_names()
         n1var = n1[:self.NQ * 2 + self.MQ]
         map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
@@ -170,9 +170,9 @@ class MRD(model):
         return params
 
 #     def _set_var_params(self, g, X, X_var, Z):
-#         g.X = X.reshape(self.N, self.Q)
-#         g.X_variance = X_var.reshape(self.N, self.Q)
-#         g.Z = Z.reshape(self.M, self.Q)
+#         g.X = X.reshape(self.N, self.input_dim)
+#         g.X_variance = X_var.reshape(self.N, self.input_dim)
+#         g.Z = Z.reshape(self.M, self.input_dim)
 #
 #     def _set_kern_params(self, g, p):
 #         g.kern._set_params(p[:g.kern.Nparam])
@@ -235,13 +235,13 @@ class MRD(model):
                 Ylist.append(likelihood_or_Y.Y)
         del likelihood_list
         if init in "PCA_concat":
-            X = PCA(numpy.hstack(Ylist), self.Q)[0]
+            X = PCA(numpy.hstack(Ylist), self.input_dim)[0]
         elif init in "PCA_single":
-            X = numpy.zeros((Ylist[0].shape[0], self.Q))
-            for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(Ylist)), Ylist):
+            X = numpy.zeros((Ylist[0].shape[0], self.input_dim))
+            for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.input_dim), len(Ylist)), Ylist):
                 X[:, qs] = PCA(Y, len(qs))[0]
         else: # init == 'random':
-            X = numpy.random.randn(Ylist[0].shape[0], self.Q)
+            X = numpy.random.randn(Ylist[0].shape[0], self.input_dim)
         self.X = X
         return X
 
@@ -252,7 +252,7 @@ class MRD(model):
         if init in "permute":
             Z = numpy.random.permutation(X.copy())[:self.M]
         elif init in "random":
-            Z = numpy.random.randn(self.M, self.Q) * X.var()
+            Z = numpy.random.randn(self.M, self.input_dim) * X.var()
         self.Z = Z
         return Z
 
@@ -261,12 +261,12 @@ class MRD(model):
             fig = pylab.figure(num=fignum, figsize=(4 * len(self.bgplvms), 3))
         for i, g in enumerate(self.bgplvms):
             if axes is None:
-                axes = fig.add_subplot(1, len(self.bgplvms), i + 1)
+                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
             elif isinstance(axes, (tuple, list)):
-                axes = axes[i]
+                ax = axes[i]
             else:
-                raise ValueError("Need one axes per latent dimension Q")
-            plotf(i, g, axes)
+                raise ValueError("Need one axes per latent dimension input_dim")
+            plotf(i, g, ax)
         pylab.draw()
         if axes is None:
             fig.tight_layout()
@@ -277,20 +277,20 @@ class MRD(model):
     def plot_X_1d(self):
         return self.gref.plot_X_1d()
 
-    def plot_X(self, fignum="MRD Predictions", ax=None):
+    def plot_X(self, fignum=None, ax=None):
         fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X))
         return fig
 
-    def plot_predict(self, fignum="MRD Predictions", ax=None, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs))
+    def plot_predict(self, fignum=None, ax=None, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g. predict(g.X)[0], **kwargs))
         return fig
 
-    def plot_scales(self, fignum="MRD Scales", ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(axes=ax, *args, **kwargs))
+    def plot_scales(self, fignum=None, ax=None, *args, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
         return fig
 
-    def plot_latent(self, fignum="MRD Latent Spaces", ax=None, *args, **kwargs):
-        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(axes=ax, *args, **kwargs))
+    def plot_latent(self, fignum=None, ax=None, *args, **kwargs):
+        fig = self._handle_plotting(fignum, ax, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
         return fig
 
     def _debug_plot(self):
diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py
index 8760ed2e..ce71cd9b 100644
--- a/GPy/models/sparse_GPLVM.py
+++ b/GPy/models/sparse_GPLVM.py
@@ -17,25 +17,25 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
 
     :param Y: observed data
     :type Y: np.ndarray
-    :param Q: latent dimensionality
-    :type Q: int
+    :param input_dim: latent dimensionality
+    :type input_dim: int
     :param init: initialisation method for the latent space
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, kernel=None, init='PCA', M=10):
-        X = self.initialise_latent(init, Q, Y)
+    def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10):
+        X = self.initialise_latent(init, input_dim, Y)
         sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
 
     def _get_param_names(self):
-        return (sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
+        return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[])
                 + sparse_GP_regression._get_param_names(self))
 
     def _get_params(self):
         return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
 
     def _set_params(self,x):
-        self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
+        self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy()
         sparse_GP_regression._set_params(self, x[self.X.size:])
 
     def log_likelihood(self):
diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py
index 5396e175..ae72983a 100644
--- a/GPy/testing/bgplvm_tests.py
+++ b/GPy/testing/bgplvm_tests.py
@@ -7,67 +7,67 @@ import GPy
 
 class BGPLVMTests(unittest.TestCase):
     def test_bias_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         Y -= Y.mean(axis=0)
-        k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k,  M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_linear_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         Y -= Y.mean(axis=0)
-        k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k,  M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_rbf_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         Y -= Y.mean(axis=0)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k,  M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_rbf_bias_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) +  GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         Y -= Y.mean(axis=0)
-        k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k,  M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     #@unittest.skip('psi2 cross terms are NotImplemented for this combination')
     def test_linear_bias_kern(self):
-        N, M, Q, D = 30, 5, 4, 30
-        X = np.random.rand(N, Q)
-        k = GPy.kern.linear(Q) +  GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 30, 5, 4, 30
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.linear(input_dim) +  GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
         Y -= Y.mean(axis=0)
-        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+        k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k,  M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py
index 51828768..b1721a3c 100644
--- a/GPy/testing/gplvm_tests.py
+++ b/GPy/testing/gplvm_tests.py
@@ -7,37 +7,37 @@ import GPy
 
 class GPLVMTests(unittest.TestCase):
     def test_bias_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.GPLVM(Y, input_dim, kernel = k)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_linear_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.GPLVM(Y, input_dim, kernel = k)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_rbf_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.GPLVM(Y, input_dim, kernel = k)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py
index 903cddfc..25adbca6 100644
--- a/GPy/testing/mrd_tests.py
+++ b/GPy/testing/mrd_tests.py
@@ -14,16 +14,16 @@ class MRDTests(unittest.TestCase):
 
     def test_gradients(self):
         num_m = 3
-        N, M, Q, D = 20, 8, 6, 20
-        X = np.random.rand(N, Q)
+        N, M, input_dim, D = 20, 8, 6, 20
+        X = np.random.rand(N, input_dim)
 
-        k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q)
+        k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
         K = k.K(X)
 
         Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)]
         likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
 
-        m = GPy.models.MRD(likelihood_list, Q=Q, kernels=k, M=M)
+        m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M)
         m.ensure_default_constraints()
 
         self.assertTrue(m.checkgrad())
diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py
index 7c41098f..23f841b5 100644
--- a/GPy/testing/psi_stat_gradient_tests.py
+++ b/GPy/testing/psi_stat_gradient_tests.py
@@ -16,25 +16,25 @@ class PsiStatModel(model):
         self.X = X
         self.X_variance = X_variance
         self.Z = Z
-        self.N, self.Q = X.shape
-        self.M, Q = Z.shape
-        assert self.Q == Q, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
+        self.N, self.input_dim = X.shape
+        self.M, input_dim = Z.shape
+        assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
         self.kern = kernel
         super(PsiStatModel, self).__init__()
         self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
     def _get_param_names(self):
-        Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.Q))]
-        Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.Q))]
+        Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
+        Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.input_dim))]
         return Xnames + Znames + self.kern._get_param_names()
     def _get_params(self):
         return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
     def _set_params(self, x, save_old=True, save_count=0):
         start, end = 0, self.X.size
-        self.X = x[start:end].reshape(self.N, self.Q)
+        self.X = x[start:end].reshape(self.N, self.input_dim)
         start, end = end, end + self.X_variance.size
-        self.X_variance = x[start: end].reshape(self.N, self.Q)
+        self.X_variance = x[start: end].reshape(self.N, self.input_dim)
         start, end = end, end + self.Z.size
-        self.Z = x[start: end].reshape(self.M, self.Q)
+        self.Z = x[start: end].reshape(self.M, self.input_dim)
         self.kern._set_params(x[end:])
     def log_likelihood(self):
         return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
@@ -43,24 +43,24 @@ class PsiStatModel(model):
         try:
             psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
         except AttributeError:
-            psiZ = numpy.zeros(self.M * self.Q)
+            psiZ = numpy.zeros(self.M * self.input_dim)
         thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
         return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
 
 class DPsiStatTest(unittest.TestCase):
-    Q = 5
+    input_dim = 5
     N = 50
     M = 10
     D = 20
-    X = numpy.random.randn(N, Q)
+    X = numpy.random.randn(N, input_dim)
     X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
     Z = numpy.random.permutation(X)[:M]
-    Y = X.dot(numpy.random.randn(Q, D))
-#     kernels = [GPy.kern.linear(Q, ARD=True, variances=numpy.random.rand(Q)), GPy.kern.rbf(Q, ARD=True), GPy.kern.bias(Q)]
+    Y = X.dot(numpy.random.randn(input_dim, D))
+#     kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
 
-    kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
-               GPy.kern.linear(Q) + GPy.kern.bias(Q),
-               GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
+    kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
+               GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
+               GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
 
     def testPsi0(self):
         for k in self.kernels:
@@ -108,31 +108,31 @@ if __name__ == "__main__":
     import sys
     interactive = 'i' in sys.argv
     if interactive:
-#         N, M, Q, D = 30, 5, 4, 30
-#         X = numpy.random.rand(N, Q)
-#         k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+#         N, M, input_dim, D = 30, 5, 4, 30
+#         X = numpy.random.rand(N, input_dim)
+#         k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
 #         K = k.K(X)
 #         Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
 #         Y -= Y.mean(axis=0)
-#         k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-#         m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
+#         k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+#         m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M)
 #         m.ensure_default_constraints()
 #         m.randomize()
 # #         self.assertTrue(m.checkgrad())
         numpy.random.seed(0)
-        Q = 5
+        input_dim = 5
         N = 50
         M = 10
         D = 15
-        X = numpy.random.randn(N, Q)
+        X = numpy.random.randn(N, input_dim)
         X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
         Z = numpy.random.permutation(X)[:M]
-        Y = X.dot(numpy.random.randn(Q, D))
-#         kernel = GPy.kern.bias(Q)
+        Y = X.dot(numpy.random.randn(input_dim, D))
+#         kernel = GPy.kern.bias(input_dim)
 #
-#         kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
-#                GPy.kern.linear(Q) + GPy.kern.bias(Q),
-#                GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
+#         kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
+#                GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
+#                GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
 
 #         for k in kernels:
 #             m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
@@ -140,18 +140,18 @@ if __name__ == "__main__":
 #             assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
 #
 #         m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
-#                          M=M, kernel=GPy.kern.linear(Q))
+#                          M=M, kernel=GPy.kern.linear(input_dim))
 #         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
 #                          M=M, kernel=kernel)
 #         m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
 #                          M=M, kernel=kernel)
 #         m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-#                          M=M, kernel=GPy.kern.rbf(Q))
+#                          M=M, kernel=GPy.kern.rbf(input_dim))
         m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-                         M=M, kernel=GPy.kern.linear(Q, ARD=True, variances=numpy.random.rand(Q)))
+                         M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
         m3.ensure_default_constraints()
-        # + GPy.kern.bias(Q))
+        # + GPy.kern.bias(input_dim))
 #         m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
-#                          M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
+#                          M=M, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim))
     else:
         unittest.main()
diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py
index 35fa4fcf..a790ce54 100644
--- a/GPy/testing/sparse_gplvm_tests.py
+++ b/GPy/testing/sparse_gplvm_tests.py
@@ -7,38 +7,38 @@ import GPy
 
 class sparse_GPLVMTests(unittest.TestCase):
     def test_bias_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     @unittest.skip('linear kernels do not have dKdiag_dX')
     def test_linear_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
 
     def test_rbf_kern(self):
-        N, M, Q, D = 10, 3, 2, 4
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+        N, M, input_dim, D = 10, 3, 2, 4
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
-        m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
+        k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
+        m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M)
         m.ensure_default_constraints()
         m.randomize()
         self.assertTrue(m.checkgrad())
diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py
index 61e18e1c..bb92fe27 100644
--- a/GPy/testing/unit_tests.py
+++ b/GPy/testing/unit_tests.py
@@ -144,23 +144,23 @@ class GradientTests(unittest.TestCase):
 
     def test_GPLVM_rbf_bias_white_kern_2D(self):
         """ Testing GPLVM with rbf + bias and white kernel """
-        N, Q, D = 50, 1, 2
-        X = np.random.rand(N, Q)
-        k = GPy.kern.rbf(Q, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05)
+        N, input_dim, D = 50, 1, 2
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.rbf(input_dim, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        m = GPy.models.GPLVM(Y, Q, kernel = k)
+        m = GPy.models.GPLVM(Y, input_dim, kernel = k)
         m.ensure_default_constraints()
         self.assertTrue(m.checkgrad())
 
     def test_GPLVM_rbf_linear_white_kern_2D(self):
         """ Testing GPLVM with rbf + bias and white kernel """
-        N, Q, D = 50, 1, 2
-        X = np.random.rand(N, Q)
-        k = GPy.kern.linear(Q) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05)
+        N, input_dim, D = 50, 1, 2
+        X = np.random.rand(N, input_dim)
+        k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05)
         K = k.K(X)
         Y = np.random.multivariate_normal(np.zeros(N),K,D).T
-        m = GPy.models.GPLVM(Y, Q, init = 'PCA', kernel = k)
+        m = GPy.models.GPLVM(Y, input_dim, init = 'PCA', kernel = k)
         m.ensure_default_constraints()
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index da23a0a8..6e7d26d8 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -162,19 +162,19 @@ def multiple_pdinv(A):
     return np.dstack(invs),np.array(halflogdets)
 
 
-def PCA(Y, Q):
+def PCA(Y, input_dim):
     """
     Principal component analysis: maximum likelihood solution by SVD
 
     Arguments
     ---------
     :param Y: NxD np.array of data
-    :param Q: int, dimension of projection
+    :param input_dim: int, dimension of projection
 
     Returns
     -------
-    :rval X: - NxQ np.array of dimensionality reduced data
-    W - QxD mapping from X to Y
+    :rval X: - Nxinput_dim np.array of dimensionality reduced data
+    W - input_dimxD mapping from X to Y
     """
     if not np.allclose(Y.mean(axis=0), 0.0):
         print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
@@ -182,7 +182,7 @@ def PCA(Y, Q):
         #Y -= Y.mean(axis=0)
 
     Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False)
-    [X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]]
+    [X, W] = [Z[0][:,0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:,0:input_dim]]
     v = X.std(axis=0)
     X /= v;
     W *= v;
diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py
index 47896e48..c09b5c93 100644
--- a/GPy/util/plot_latent.py
+++ b/GPy/util/plot_latent.py
@@ -14,10 +14,10 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
     if labels is None:
         labels = np.ones(model.N)
     if which_indices is None:
-        if model.Q==1:
+        if model.input_dim==1:
             input_1 = 0
             input_2 = None
-        if model.Q==2:
+        if model.input_dim==2:
             input_1, input_2 = 0,1
         else:
             try:
@@ -55,7 +55,7 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
             m = marker
 
         index = np.nonzero(labels==ul)[0]
-        if model.Q==1:
+        if model.input_dim==1:
             x = model.X[index,input_1]
             y = np.zeros(index.size)
         else:
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index b3429850..47e632bc 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -74,7 +74,7 @@ class lvm(data_show):
         self.called = False
         self.move_on = False
         self.latent_index = latent_index
-        self.latent_dim = model.Q
+        self.latent_dim = model.input_dim
 
         # The red cross which shows current latent point.
         self.latent_values = vals
@@ -113,7 +113,7 @@ class lvm(data_show):
         # A click in the bar chart axis for selection a dimension.
         if self.sense_axes != None:
             self.sense_axes.cla()
-            self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
+            self.sense_axes.bar(np.arange(self.model.input_dim),1./self.model.input_sensitivity(),color='b')
 
             if self.latent_index[1] == self.latent_index[0]:
                 self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
@@ -128,11 +128,11 @@ class lvm(data_show):
 
 class lvm_subplots(lvm):
     """
-    latent_axes is a np array of dimension np.ceil(Q/2),
+    latent_axes is a np array of dimension np.ceil(input_dim/2),
     one for each pair of the latent dimensions.
     """
     def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None):
-        self.nplots = int(np.ceil(model.Q/2.))+1
+        self.nplots = int(np.ceil(model.input_dim/2.))+1
         assert len(latent_axes)==self.nplots
         if vals==None:
             vals = model.X[0, :]
@@ -140,7 +140,7 @@ class lvm_subplots(lvm):
 
         for i, axis in enumerate(latent_axes):
             if i == self.nplots-1:
-                if self.nplots*2!=model.Q:
+                if self.nplots*2!=model.input_dim:
                     latent_index = [i*2, i*2]
                 lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index)
             else:
@@ -174,7 +174,7 @@ class lvm_dimselect(lvm):
     def on_click(self, event):
 
         if event.inaxes==self.sense_axes:
-            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.Q-1))
+            new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1))
             if event.button == 1:
                 # Make it red if and y-axis (red=port=left) if it is a left button click
                 self.latent_index[1] = new_index                
diff --git a/doc/tuto_GP_regression.rst b/doc/tuto_GP_regression.rst
index 24e10528..87744c85 100644
--- a/doc/tuto_GP_regression.rst
+++ b/doc/tuto_GP_regression.rst
@@ -23,9 +23,9 @@ Note that the observations Y include some noise.
 
 The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential)::
 
-    kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
+    kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
 
-The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Note that many other kernels are implemented such as:
+The parameter ``D`` stands for the dimension of the input space. The parameters ``variance`` and ``lengthscale`` are optional. Many other kernels are implemented such as:
 
 * linear (``GPy.kern.linear``)
 * exponential kernel (``GPy.kern.exponential``)
@@ -50,7 +50,7 @@ gives the following output: ::
     -----------------------------------------------------------------
        rbf_variance    |  1.0000  |               |        |         
       rbf_lengthscale  |  1.0000  |               |        |         
-      noise variance   |  1.0000  |               |        |         
+      noise_variance   |  1.0000  |               |        |         
 
 .. figure::  Figures/tuto_GP_regression_m1.png
     :align:   center
@@ -65,14 +65,14 @@ The default values of the kernel parameters may not be relevant for the current
 
 There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
 
-    m.constrain_positive('')
+    m.ensure_default_constraints() # or similarly m.constrain_positive('')
 
 but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
 
     m.unconstrain('')                            # Required to remove the previous constrains
-    m.constrain_positive('rbf_variance')
-    m.constrain_bounded('lengthscale',1.,10. )
-    m.constrain_fixed('noise',0.0025)
+    m.constrain_positive('.*rbf_variance')
+    m.constrain_bounded('.*lengthscale',1.,10. )
+    m.constrain_fixed('.*noise',0.0025)
 
 Once the constrains have been imposed, the model can be optimized::
 
@@ -80,7 +80,7 @@ Once the constrains have been imposed, the model can be optimized::
 
 If we want to perform some restarts to try to improve the result of the optimization, we can use the ``optimize_restart`` function::
 
-    m.optimize_restarts(Nrestarts = 10)
+    m.optimize_restarts(num_restarts = 10)
 
 Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting model  resulting model::
 
@@ -89,7 +89,7 @@ Once again, we can use ``print(m)`` and ``m.plot()`` to look at the resulting mo
     -----------------------------------------------------------------
        rbf_variance    |  0.8151  |     (+ve)     |        |         
       rbf_lengthscale  |  1.8037  |  (1.0, 10.0)  |        |         
-      noise variance   |  0.0025  |     Fixed     |        |         
+      noise_variance   |  0.0025  |     Fixed     |        |         
 
 .. figure::  Figures/tuto_GP_regression_m2.png
     :align:   center
@@ -122,9 +122,7 @@ Here is a 2 dimensional example::
     m.constrain_positive('')
 
     # optimize and plot
-    pb.figure()
     m.optimize('tnc', max_f_eval = 1000)
-
     m.plot()
     print(m)
 
diff --git a/doc/tuto_creating_new_kernels.rst b/doc/tuto_creating_new_kernels.rst
index 24003ba2..6d30fe05 100644
--- a/doc/tuto_creating_new_kernels.rst
+++ b/doc/tuto_creating_new_kernels.rst
@@ -29,18 +29,18 @@ The header is similar to all kernels: ::
 
     class rational_quadratic(kernpart):
 
-**__init__(self,D, param1, param2, ...)**
+**__init__(self,input_dim, param1, param2, ...)**
 
 The implementation of this function in mandatory.
 
-For all kernparts the first parameter ``D`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel.
+For all kernparts the first parameter ``input_dim`` corresponds to the dimension of the input space, and the following parameters stand for the parameterization of the kernel.
 
-The following attributes are compulsory: ``self.D`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.Nparam`` (number of parameters, integer). ::
+The following attributes are compulsory: ``self.input_dim`` (the dimension, integer), ``self.name`` (name of the kernel, string), ``self.num_params`` (number of parameters, integer). ::
 
-    def __init__(self,D,variance=1.,lengthscale=1.,power=1.):
-        assert D == 1, "For this kernel we assume D=1"
-        self.D = D
-        self.Nparam = 3
+    def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.):
+        assert input_dim == 1, "For this kernel we assume input_dim=1"
+        self.input_dim = input_dim
+        self.num_params = 3
         self.name = 'rat_quad'
         self.variance = variance
         self.lengthscale = lengthscale
@@ -50,7 +50,7 @@ The following attributes are compulsory: ``self.D`` (the dimension, integer), ``
 
 The implementation of this function in mandatory.
 
-This function returns a one dimensional array of length ``self.Nparam`` containing the value of the parameters. ::
+This function returns a one dimensional array of length ``self.num_params`` containing the value of the parameters. ::
 
     def _get_params(self):
         return np.hstack((self.variance,self.lengthscale,self.power))
@@ -59,7 +59,7 @@ This function returns a one dimensional array of length ``self.Nparam`` containi
 
 The implementation of this function in mandatory.
 
-The input is a one dimensional array of length ``self.Nparam`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...). ::
+The input is a one dimensional array of length ``self.num_params`` containing the value of the parameters. The function has no output but it updates the values of the attribute associated to the parameters (such as ``self.variance``, ``self.lengthscale``, ...). ::
 
     def _set_params(self,x):
         self.variance = x[0]
@@ -70,7 +70,7 @@ The input is a one dimensional array of length ``self.Nparam`` containing the va
 
 The implementation of this function in mandatory.
 
-It returns a list of strings of length ``self.Nparam`` corresponding to the parameter names. ::
+It returns a list of strings of length ``self.num_params`` corresponding to the parameter names. ::
 
     def _get_param_names(self):
         return ['variance','lengthscale','power']
@@ -79,7 +79,7 @@ It returns a list of strings of length ``self.Nparam`` corresponding to the para
 
 The implementation of this function in mandatory.
 
-This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.D`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use. ::
+This function is used to compute the covariance matrix associated with the inputs X, X2 (np.arrays with arbitrary number of line (say :math:`n_1`, :math:`n_2`) and ``self.input_dim`` columns). This function does not returns anything but it adds the :math:`n_1 \times n_2` covariance matrix to the kernpart to the object ``target`` (a :math:`n_1 \times n_2` np.array). This trick allows to compute the covariance matrix of a kernel containing many kernparts with a limited memory use. ::
 
     def K(self,X,X2,target):
         if X2 is None: X2 = X
@@ -100,7 +100,7 @@ This function is similar to ``K`` but it computes only the values of the kernel
 
 This function is required for the optimization of the parameters.
 
-Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.Nparam``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second. ::
+Computes the derivative of the likelihood. As previously, the values are added to the object target which is a 1-dimensional np.array of length ``self.input_dim``. For example, if the kernel is parameterized by :math:`\sigma^2,\ \theta`, then :math:`\frac{dL}{d\sigma^2} = \frac{dL}{d K} \frac{dK}{d\sigma^2}` is added to the first element of target and :math:`\frac{dL}{d\theta} = \frac{dL}{d K} \frac{dK}{d\theta}` to the second. ::
 
     def dK_dtheta(self,dL_dK,X,X2,target):
         if X2 is None: X2 = X
@@ -119,7 +119,7 @@ Computes the derivative of the likelihood. As previously, the values are added t
 
 This function is required for BGPLVM, sparse models and uncertain inputs.
 
-As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element. ::
+As previously, target is an ``self.num_params`` array and :math:`\frac{dL}{d Kdiag} \frac{dKdiag}{dparam}` is added to each element. ::
 
     def dKdiag_dtheta(self,dL_dKdiag,X,target):
         target[0] += np.sum(dL_dKdiag)
@@ -129,7 +129,7 @@ As previously, target is an ``self.Nparam`` array and :math:`\frac{dL}{d Kdiag}
 
 This function is required for GPLVM, BGPLVM, sparse models and uncertain inputs.
 
-Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times D` np.array). The result is added to target which is a :math:`n \times D` np.array. ::
+Computes the derivative of the likelihood with respect to the inputs ``X`` (a :math:`n \times d` np.array). The result is added to target which is a :math:`n \times d` np.array. ::
 
     def dK_dX(self,dL_dK,X,X2,target):
         """derivative of the covariance matrix with respect to X."""
@@ -169,9 +169,9 @@ The following line should be added in the preamble of the file::
 
 as well as the following block ::
 
-    def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
-        part = rational_quadraticpart(D,variance, lengthscale, power)
-        return kern(D, [part])
+    def rational_quadratic(input_dim,variance=1., lengthscale=1., power=1.):
+        part = rational_quadraticpart(input_dim,variance, lengthscale, power)
+        return kern(input_dim, [part])
 
 
 Update initialization
diff --git a/doc/tuto_interacting_with_models.rst b/doc/tuto_interacting_with_models.rst
index 3031a5e1..3cea7fb7 100644
--- a/doc/tuto_interacting_with_models.rst
+++ b/doc/tuto_interacting_with_models.rst
@@ -18,6 +18,7 @@ All of the examples included in GPy return an instance
 of a model class, and therefore they can be called in 
 the following way: ::
 
+	import numpy as np
     import pylab as pb
     pb.ion()
     import GPy
@@ -91,19 +92,17 @@ we can define a new array of values and change the parameters as follows: ::
 If we call the function ``_get_params()`` again, we will obtain the new
 parameters we have just set.
 
-Parameters can be also set by name using the function ``_set()``. For example,
-lets change the lengthscale to .5: ::
+Parameters can be also set by name using dictionary notations. For example,
+let's change the lengthscale to .5: ::
 
-	m.set('rbf_lengthscale',.5)
+	m['rbf_lengthscale'] = .5
 
-``_set()`` function accepts regular expression as it first
-input, and therefore all parameters matching that regular 
-expression are set to the given value. In this case rather 
+Here, the matching accepts a regular expression and therefore all parameters matching that regular expression are set to the given value. In this case rather 
 than passing as second output a single value, we can also 
 use a list of arrays. For example, lets change the inducing 
 inputs: ::
 
-	m.set('iip',np.arange(-4,0))
+	m['iip'] = np.arange(-5,0)
 
 Getting the model's likelihood and gradients
 ===========================================
@@ -129,10 +128,9 @@ we have been changing the parameters, the gradients are far from zero now.
 Next we are going to show how to optimize the model setting different 
 restrictions on the parameters. 
 
-Once a constrain has been set on a parameter, it is not possible to
-define a new constraint for it unless we explicitly remove the previous
-one. The command to remove the constraints is ``unconstrain()``, and
-just as the ``set()`` command, it also accepts regular expression.
+Once a constrain has been set on a parameter, it is possible to remove it
+with the command ``unconstrain()``, and
+just as the previous matching commands, it also accepts regular expression.
 In this case we will remove all the constraints: ::
 
 	m.unconstrain('')
@@ -144,7 +142,7 @@ is to be positive. This is constraint is easily set
 with the function ``constrain_positive()``. Regular expressions
 are also accepted. ::
 
-    m.constrain_positive('var')
+    m.constrain_positive('.*var')
 
 For convenience, GPy also provides a catch all function 
 which ensures that anything which appears to require 
@@ -179,7 +177,7 @@ however for the sake of the example we will tie the white noise
 and the variance together. See `A kernel overview <tuto_kernel_overview.html>`_.
 for a proper use of the tying capabilities.::
 
-    m.tie_params('e_var')
+    m.tie_params('.*e_var')
 
 Optimizing the model
 ====================
diff --git a/doc/tuto_kernel_overview.rst b/doc/tuto_kernel_overview.rst
index 391881d8..6cc7b30d 100644
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@@ -13,8 +13,8 @@ First we import the libraries we will need ::
 
 For most kernels, the dimension is the only mandatory parameter to define a kernel object. However, it is also possible to specify the values of the parameters. For example, the three following commands are valid for defining a squared exponential kernel (ie rbf or Gaussian) ::
 
-    ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
-    ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
+    ker1 = GPy.kern.rbf(1)  # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
+    ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
     ker3 = GPy.kern.rbf(1, .5, .5)
 
 A ``print`` and a ``plot`` functions are implemented to represent kernel objects. The commands ::
@@ -144,9 +144,9 @@ When calling one of these functions, the parameters to constrain can either by s
     k = k1 + k2 + k3
     print k
 
-    k.constrain_positive('var')
+    k.constrain_positive('.*var')
     k.constrain_fixed(np.array([1]),1.75)
-    k.tie_params('len')
+    k.tie_params('.*len')
     k.unconstrain('white')
     k.constrain_bounded('white',lower=1e-5,upper=.5)
     print k
@@ -212,7 +212,6 @@ Note the ties between the parameters of ``Kanova`` that reflect the links betwee
 
     # Create GP regression model
     m = GPy.models.GP_regression(X,Y,Kanova)
-    pb.figure(figsize=(5,5))
     m.plot()
 
 .. figure::  Figures/tuto_kern_overview_mANOVA.png