diff --git a/GPy/inference/SGD.py b/GPy/inference/SGD.py
index e1c7704c..a08d0f28 100644
--- a/GPy/inference/SGD.py
+++ b/GPy/inference/SGD.py
@@ -58,7 +58,7 @@ class opt_SGD(Optimizer):
 
         for s in param_shapes:
             N, Q = s
-            X = x[i:N*Q].reshape(N, Q)
+            X = x[i:i+N*Q].reshape(N, Q)
             X = X[samples]
             subset = np.append(subset, X.flatten())
             i += N*Q
@@ -92,10 +92,19 @@ class opt_SGD(Optimizer):
                         self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask]
 
 
+        # here we shif the positive constraints. We cycle through each positive
+        # constraint
         positive = self.model.constrained_positive_indices.copy()
+        mask = (np.ones_like(positive) == 1)
         for p in range(len(positive)):
-            pos = np.where(j == self.model.constrained_positive_indices[p])[0][0]
-            self.model.constrained_positive_indices[p] = pos
+            # we now check whether the constrained index appears in the j vector
+            # (the vector of the "active" indices)
+            pos = np.where(j == self.model.constrained_positive_indices[p])[0]
+            if len(pos) == 1:
+                self.model.constrained_positive_indices[p] = pos
+            else:
+                mask[p] = False
+        self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask]
 
         return (bounded_i, bounded_l, bounded_u), positive
 
@@ -109,6 +118,8 @@ class opt_SGD(Optimizer):
         model_name = self.model.__class__.__name__
         if model_name == 'GPLVM':
             return [(N, Q)]
+        if model_name == 'Bayesian_GPLVM':
+            return [(N, Q), (N, Q)]
         else:
             raise NotImplementedError
 
@@ -119,14 +130,20 @@ class opt_SGD(Optimizer):
         self.model.N = samples.sum()
         self.model.X = X[samples]
         self.model.Y = self.model.Y[samples]
+        model_name = self.model.__class__.__name__
 
+        import pdb; pdb.set_trace()
+        if model_name == 'Bayesian_GPLVM':
+            self.model.trYYT = np.sum(np.square(self.model.Y))
+            
         if self.model.N == 0:
             return 0, step, self.model.N
 
-        b,p = self.shift_constraints(j)
+        b, p = self.shift_constraints(j)
 
         momentum_term = self.momentum * step[j]
         f, fp = f_fp(self.x_opt[j])
+
         step[j] = self.learning_rate[j] * fp
         self.x_opt[j] -= step[j] + momentum_term
 
diff --git a/GPy/models/BGPLVM.py b/GPy/models/BGPLVM.py
index 3fc257e9..2b8874f6 100644
--- a/GPy/models/BGPLVM.py
+++ b/GPy/models/BGPLVM.py
@@ -20,9 +20,12 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', **kwargs):
-        X = self.initialise_latent(init, Q, Y)
-        S = np.ones_like(X) * 1e-2# 
+    def __init__(self, Y, Q, X = None, S = None, init='PCA', **kwargs):
+        if X == None:
+            X = self.initialise_latent(init, Q, Y)
+        if S == None:
+            S = np.ones_like(X) * 1e-2
+
         sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
 
     def get_param_names(self):
@@ -59,4 +62,3 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
 
     def log_likelihood_gradients(self):
         return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self)))
-
diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_GP_regression.py
index e0a8a35d..649e7eae 100644
--- a/GPy/models/sparse_GP_regression.py
+++ b/GPy/models/sparse_GP_regression.py
@@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression):
     """
 
     def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
-        self.scale_factor = 1000.0
+        self.scale_factor = 100.0
         self.beta = beta
         if Z is None:
             self.Z = np.random.permutation(X.copy())[:M]