diff --git a/GPy/core/model.py b/GPy/core/model.py
index 57d41602..aacf20be 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -258,6 +258,7 @@ class Model(Parameterized):
         these terms are present in the name the parameter is
         constrained positive.
         """
+        raise DeprecationWarning, 'parameters now have default constraints'
         positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
         # param_names = self._get_param_names()
         
diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py
index d52211c5..8abb31e9 100644
--- a/GPy/core/parameterization/index_operations.py
+++ b/GPy/core/parameterization/index_operations.py
@@ -77,6 +77,12 @@ class ParameterIndexOperations(object):
     def iter_properties(self):
         return self._properties.iterkeys()
     
+    def shift(self, start, size):
+        for ind in self.iterindices():
+            toshift = ind>=start
+            if len(toshift) > 0:
+                ind[toshift] += size
+    
     def clear(self):
         self._properties.clear()
     
diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py
index 678b119b..85a1e179 100644
--- a/GPy/core/parameterization/parameterized.py
+++ b/GPy/core/parameterization/parameterized.py
@@ -154,6 +154,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
                 elif param._has_fixes(): self._fixes_ = np.r_[np.ones(self.size, dtype=bool), fixes_param]
 
             else:
+                start = sum(p.size for p in self._parameters_[:index])
+                self.constraints.shift(start, param.size)
                 self._parameters_.insert(index, param)
 
                 # make sure fixes and constraints are indexed right
@@ -165,10 +167,13 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
                     self._fixes_ = np.ones(self.size+param.size, dtype=bool)
                     self._fixes_[ins:ins+param.size] = fixes_param
             self.size += param.size
+        else:
+            raise RuntimeError, """Parameter exists already added and no copy made"""
         self._connect_parameters()
         # make sure the constraints are pulled over:
         if hasattr(param, "_constraints_") and param._constraints_ is not None:
             for t, ind in param._constraints_.iteritems():
+                
                 self.constraints.add(t, ind+self._offset_for(param))
             param._constraints_.clear()
         if param._default_constraint_ is not None:
diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py
index b73e25da..2e342f54 100644
--- a/GPy/core/parameterization/variational.py
+++ b/GPy/core/parameterization/variational.py
@@ -5,6 +5,7 @@ Created on 6 Nov 2013
 '''
 from parameterized import Parameterized
 from param import Param
+from transformations import Logexp
 
 class Normal(Parameterized):
     '''
@@ -15,7 +16,7 @@ class Normal(Parameterized):
     def __init__(self, means, variances, name='latent space'):
         Parameterized.__init__(self, name=name)
         self.means = Param("mean", means)
-        self.variances = Param('variance', variances)
+        self.variances = Param('variance', variances, Logexp())
         self.add_parameters(self.means, self.variances)
 
     def plot(self, *args):
diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py
index fda201ff..130e56e2 100644
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@@ -61,8 +61,8 @@ class SparseGP(GP):
         if self.X_variance is None:
             self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
         else:
-            self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
-            self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
+            self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
+            self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
 
     def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
         """
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 4dea1342..f8d3d5a9 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -473,9 +473,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
     Z = np.random.uniform(-3., 3., (7, 1))
 
     k = GPy.kern.rbf(1)
-
+    import ipdb;ipdb.set_trace()
     # create simple GP Model - no input uncertainty on this one
-    m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
+    m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
 
     if optimize:
         m.optimize('scg', messages=1, max_iters=max_iters)
@@ -486,7 +486,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
     print m
 
     # the same Model with uncertainty
-    m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
+    m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
     if optimize:
         m.optimize('scg', messages=1, max_iters=max_iters)
     if plot:
diff --git a/GPy/inference/latent_function_inference/varDTC.py b/GPy/inference/latent_function_inference/varDTC.py
index 07ae17c5..e156a273 100644
--- a/GPy/inference/latent_function_inference/varDTC.py
+++ b/GPy/inference/latent_function_inference/varDTC.py
@@ -4,7 +4,8 @@
 from posterior import Posterior
 from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dpotri, symmetrify
 import numpy as np
-from GPy.util.linalg import dtrtri
+from ...util.linalg import dtrtri
+from ...util.caching import Cacher
 log_2_pi = np.log(2*np.pi)
 
 class VarDTC(object):
@@ -20,8 +21,13 @@ class VarDTC(object):
     def __init__(self):
         #self._YYTfactor_cache = caching.cache()
         self.const_jitter = 1e-6
+        self.get_trYYT = Cacher(self._get_trYYT, 1)
+        self.get_YYTfactor = Cacher(self._get_YYTfactor, 1)
+    
+    def _get_trYYT(self, Y):
+        return np.sum(np.square(Y))
 
-    def get_YYTfactor(self, Y):
+    def _get_YYTfactor(self, Y):
         """
         find a matrix L which satisfies LLT = YYT. 
 
@@ -31,9 +37,8 @@ class VarDTC(object):
         if (N>D):
             return Y
         else:
-            #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
-            raise NotImplementedError, 'TODO' #TODO
-
+            return jitchol(tdot(Y))
+            
     def get_VVTfactor(self, Y, prec):
         return Y * prec # TODO chache this, and make it effective
 
@@ -94,8 +99,9 @@ class VarDTC(object):
         LB = jitchol(B)
 
         # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
-        VVT_factor = self.get_VVTfactor(Y, beta)
-        trYYT = np.sum(np.square(Y))
+        self.YYTfactor = self.get_YYTfactor(Y)
+        VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
+        trYYT = self.get_trYYT(Y)
         psi1Vf = np.dot(psi1.T, VVT_factor)
 
         # back substutue C into psi1Vf
diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py
index 98945c2d..53728d0d 100644
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@@ -153,10 +153,9 @@ class kern(Parameterized):
 #             newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
 #             newkern.fixed_values = self.fixed_values + other.fixed_values
 #             newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
-        [newkern._add_constrain(param, transform, warning=False)
-         for param, transform in itertools.izip(
-                *itertools.chain(self.constraints.iteritems(),
-                                 other.constraints.iteritems()))]
+        
+        [newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
+        [newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
         newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
 
         return newkern
diff --git a/GPy/kern/parts/white.py b/GPy/kern/parts/white.py
index c9677f28..b0e961c7 100644
--- a/GPy/kern/parts/white.py
+++ b/GPy/kern/parts/white.py
@@ -4,6 +4,7 @@
 from kernpart import Kernpart
 import numpy as np
 from ...core.parameterization import Param
+from ...core.parameterization.transformations import Logexp
 
 class White(Kernpart):
     """
@@ -17,7 +18,7 @@ class White(Kernpart):
     def __init__(self,input_dim,variance=1.):
         super(White, self).__init__(input_dim, 'white')
         self.input_dim = input_dim
-        self.variance = Param('variance', variance)
+        self.variance = Param('variance', variance, Logexp())
         self.add_parameters(self.variance)
         self._psi1 = 0 # TODO: more elegance here
 
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index e6be2261..1f5b9db4 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-
 #TODO
 """
 A lot of this code assumes that the link functio nis the identity.
@@ -18,6 +17,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
 import link_functions
 from likelihood import Likelihood
 from ..core.parameterization import Param
+from ..core.parameterization.transformations import Logexp
 
 class Gaussian(Likelihood):
     """
@@ -43,7 +43,7 @@ class Gaussian(Likelihood):
 
         super(Gaussian, self).__init__(gp_link, name=name)
 
-        self.variance = Param('variance', variance)
+        self.variance = Param('variance', variance, Logexp())
         self.add_parameter(self.variance)
 
         if isinstance(gp_link, link_functions.Identity):
diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py
index 78851147..36f0c4b1 100644
--- a/GPy/models/bayesian_gplvm.py
+++ b/GPy/models/bayesian_gplvm.py
@@ -41,7 +41,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
         self.q = Normal(X, X_variance)
         SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
         self.add_parameter(self.q, index=0)
-        self.ensure_default_constraints()
+        #self.ensure_default_constraints()
 
     def _getstate(self):
         """
@@ -55,38 +55,6 @@ class BayesianGPLVM(SparseGP, GPLVM):
         self.init = state.pop()
         SparseGP._setstate(self, state)
 
-#     def _get_param_names(self):
-#         X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
-#         S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
-#         return (X_names + S_names + SparseGP._get_param_names(self))
-
-    #def _get_print_names(self):
-    #    return SparseGP._get_print_names(self)
-
-#     def _get_params(self):
-#         """
-#         Horizontally stacks the parameters in order to present them to the optimizer.
-#         The resulting 1-input_dim array has this structure:
-#
-#         ===============================================================
-#         |       mu       |        S        |    Z    | theta |  beta  |
-#         ===============================================================
-#
-#         """
-#         x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
-#         return x
-
-#     def _set_params(self, x, save_old=True, save_count=0):
-#         N, input_dim = self.num_data, self.input_dim
-#         self.X = x[:self.X.size].reshape(N, input_dim).copy()
-#         self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
-#         SparseGP._set_params(self, x[(2 * N * input_dim):])
-
-    def dKL_dmuS(self):
-        dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
-        dKL_dmu = self.X
-        return dKL_dmu, dKL_dS
-
     def dL_dmuS(self):
         dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
         dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
@@ -102,45 +70,24 @@ class BayesianGPLVM(SparseGP, GPLVM):
         return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
 
     def parameters_changed(self):
-        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
-        self._log_marginal_likelihood -= self.KL_divergence()
+        super(BayesianGPLVM, self).parameters_changed()
+        #self._log_marginal_likelihood -= self.KL_divergence()
 
-        #The derivative of the bound wrt the inducing inputs Z
-        self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
-        self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
-        self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
-        
         dL_dmu, dL_dS = self.dL_dmuS()
-        dKL_dmu, dKL_dS = self.dKL_dmuS()
-        self.q.means.gradient = dL_dmu - dKL_dmu
-        self.q.variances.gradient = dL_dS - dKL_dS
+
+        # dL:
+        self.q.means.gradient  = dL_dmu
+        self.q.variances.gradient  = dL_dS  
+
+        # dKL:
+        #self.q.means.gradient -= self.X
+        #self.q.variances.gradient -= (1. - (1. / (self.X_variance))) * 0.5
     
-
-#     def log_likelihood(self):        
-#         ll = SparseGP.log_likelihood(self)
-#         kl = self.KL_divergence()
-#         return ll - kl
-
-    def _dbound_dmuS(self):
-        dKL_dmu, dKL_dS = self.dKL_dmuS()
-        dL_dmu, dL_dS = self.dL_dmuS()
-        d_dmu = (dL_dmu - dKL_dmu).flatten()
-        d_dS = (dL_dS - dKL_dS).flatten()
-        return np.hstack((d_dmu, d_dS))
-
-#     def _log_likelihood_gradients(self):
-#         dKL_dmu, dKL_dS = self.dKL_dmuS()
-#         dL_dmu, dL_dS = self.dL_dmuS()
-#         d_dmu = (dL_dmu - dKL_dmu).flatten()
-#         d_dS = (dL_dS - dKL_dS).flatten()
-#         self.dbound_dmuS = np.hstack((d_dmu, d_dS))
-#         self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
-#         return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
-
     def plot_latent(self, plot_inducing=True, *args, **kwargs):
         """
         See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
         """
+        import sys
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import dim_reduction_plots
 
@@ -210,7 +157,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
         assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
         from ..plotting.matplot_dep import dim_reduction_plots
 
-        return dim_reduction_plots.plot_steepest_gradient_map(model,*args,**kwargs)
+        return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
 
 def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
     """
diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py
index 386380b7..c936164b 100644
--- a/GPy/models/sparse_gp_regression.py
+++ b/GPy/models/sparse_gp_regression.py
@@ -44,7 +44,7 @@ class SparseGPRegression(SparseGP):
         likelihood = likelihoods.Gaussian()
 
         SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
-        self.ensure_default_constraints()
+        #self.ensure_default_constraints()
 
     def _getstate(self):
         return SparseGP._getstate(self)