diff --git a/GPy/core/model.py b/GPy/core/model.py
index eb60d465..28bb4ff5 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -233,13 +233,18 @@ class model(parameterised):
         Ensure that any variables which should clearly be positive have been constrained somehow.
         """
         positive_strings = ['variance','lengthscale', 'precision']
+        param_names = self._get_param_names()
+        currently_constrained = self.all_constrained_indices()
+        to_make_positive = []
         for s in positive_strings:
             for i in self.grep_param_names(s):
-                if not (i in self.all_constrained_indices()):
-                    name = self._get_param_names()[i]
-                    self.constrain_positive(name)
+                if not (i in currently_constrained):
+                    to_make_positive.append(param_names[i])
                     if warn:
                         print "Warning! constraining %s postive"%name
+        if len(to_make_positive):
+            self.constrain_positive('('+'|'.join(to_make_positive)+')')
+
 
 
     def objective_function(self, x):
diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py
index bfdc9bb6..c4e064a5 100644
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@@ -61,7 +61,7 @@ def GPLVM_oil_100(optimize=True, M=15):
     m.plot_latent(labels=m.data_labels)
     return m
 
-def BGPLVM_oil(optimize=True, N=100, Q=10, M=15):
+def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
     data = GPy.util.datasets.oil()
 
     # create simple GP model
@@ -73,10 +73,10 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15):
     if optimize:
         m.constrain_fixed('noise', 0.05)
         m.ensure_default_constraints()
-        m.optimize('scg', messages=1)
+        m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
         m.unconstrain('noise')
         m.constrain_positive('noise')
-        m.optimize('scg', messages=1)
+        m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
     else:
         m.ensure_default_constraints()
 
@@ -171,31 +171,31 @@ def stick():
 
     return m
 
+# def BGPLVM_oil():
+#     data = GPy.util.datasets.oil()
+#     Y, X = data['Y'], data['X']
+#     X -= X.mean(axis=0)
+#     X /= X.std(axis=0)
+#
+#     Q = 10
+#     M = 30
+#
+#     kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
+#     m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
+#     # m.scale_factor = 100.0
+#     m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
+#     from sklearn import cluster
+#     km = cluster.KMeans(M, verbose=10)
+#     Z = km.fit(m.X).cluster_centers_
+#     # Z = GPy.util.misc.kmm_init(m.X, M)
+#     m.set('iip', Z)
+#     m.set('bias', 1e-4)
+#     # optimize
+#     # m.ensure_default_constraints()
+#
+#     import pdb; pdb.set_trace()
+#     m.optimize('tnc', messages=1)
+#     print m
+#     m.plot_latent(labels=data['Y'].argmax(axis=1))
+#     return m
 
-def BGPLVM_oil():
-    data = GPy.util.datasets.oil()
-    Y, X = data['Y'], data['X']
-    X -= X.mean(axis=0)
-    X /= X.std(axis=0)
-
-    Q = 10
-    M = 30
-
-    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
-    m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M)
-    # m.scale_factor = 100.0
-    m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
-    from sklearn import cluster
-    km = cluster.KMeans(M, verbose=10)
-    Z = km.fit(m.X).cluster_centers_
-    # Z = GPy.util.misc.kmm_init(m.X, M)
-    m.set('iip', Z)
-    m.set('bias', 1e-4)
-    # optimize
-    # m.ensure_default_constraints()
-
-    import pdb; pdb.set_trace()
-    m.optimize('tnc', messages=1)
-    print m
-    m.plot_latent(labels=data['Y'].argmax(axis=1))
-    return m
diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py
index 84f7d68d..84521cf9 100644
--- a/GPy/kern/rbf.py
+++ b/GPy/kern/rbf.py
@@ -5,6 +5,7 @@
 from kernpart import kernpart
 import numpy as np
 import hashlib
+from scipy import weave
 
 class rbf(kernpart):
     """
@@ -172,7 +173,7 @@ class rbf(kernpart):
         """Think N,M,M,Q """
         self._psi_computations(Z,mu,S)
         tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
-        target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
+        target_mu += -2.*(dL_dpsi2[:,:,:,None]*tmp*self._psi2_mudist).sum(1).sum(1)
         target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
 
 
@@ -206,7 +207,6 @@ class rbf(kernpart):
         if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
             #something's changed. recompute EVERYTHING
 
-            #TODO: make more efficient for large Q (using NDL's dot product trick)
             #psi1
             self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
             self._psi1_dist = Z[None,:,:]-mu[:,None,:]
@@ -216,9 +216,78 @@ class rbf(kernpart):
 
             #psi2
             self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
-            self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
-            self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
-            self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
+            self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
+            #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
+            #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
+            #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
             self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
 
+            #store matrices for caching
             self._Z, self._mu, self._S = Z, mu,S
+
+    def weave_psi2(self,mu,Zhat):
+        weave_options = {'headers'           : ['<omp.h>'],
+                         'extra_compile_args': ['-fopenmp -march=native'],
+                         'extra_link_args'   : ['-lgomp'],
+                         'compiler'          : 'gcc'}
+
+        N,Q = mu.shape
+        M = Zhat.shape[0]
+
+        mudist = np.empty((N,M,M,Q))
+        mudist_sq = np.empty((N,M,M,Q))
+        psi2_exponent = np.zeros((N,M,M))
+        psi2 = np.empty((N,M,M))
+
+        psi2_Zdist_sq = self._psi2_Zdist_sq
+        half_log_psi2_denom = 0.5*np.log(self._psi2_denom).squeeze()
+        variance_sq = float(np.square(self.variance))
+        if self.ARD:
+            lengthscale2 = self.lengthscale2
+        else:
+            lengthscale2 = np.ones(Q)*self.lengthscale2
+        _psi2_denom = self._psi2_denom.squeeze()
+        code = """
+        double tmp;
+
+        #pragma omp parallel for private(tmp)
+        for (int n=0; n<N; n++){
+            for (int m=0; m<M; m++){
+               for (int mm=0; mm<(m+1); mm++){
+                   for (int q=0; q<Q; q++){
+                       //compute mudist
+                       tmp = mu(n,q) - Zhat(m,mm,q);
+                       mudist(n,m,mm,q) = tmp;
+                       mudist(n,mm,m,q) = tmp;
+
+                       //now mudist_sq
+                       tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
+                       mudist_sq(n,m,mm,q) = tmp;
+                       mudist_sq(n,mm,m,q) = tmp;
+
+                       //now psi2_exponent
+                       tmp = -psi2_Zdist_sq(m,mm,q)/4.0 - tmp - half_log_psi2_denom(n,q);
+                       psi2_exponent(n,mm,m) += tmp;
+                       if (m !=mm){
+                           psi2_exponent(n,m,mm) += tmp;
+                       }
+                   //psi2 would be computed like this, but np is faster
+                   //tmp = variance_sq*exp(psi2_exponent(n,m,mm));
+                   //psi2(n,m,mm) = tmp;
+                   //psi2(n,mm,m) = tmp;
+                   }
+                }
+            }
+        }
+
+        """
+
+        support_code = """
+        #include <omp.h>
+        #include <math.h>
+        """
+        weave.inline(code, support_code=support_code, libraries=['gomp'],
+                     arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
+                     type_converters=weave.converters.blitz,**weave_options)
+
+        return mudist,mudist_sq, psi2_exponent, psi2
diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py
index ba9603bb..ee485e76 100644
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@@ -95,3 +95,4 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
             input_1, input_2 = which_indices
         ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2],*args, **kwargs)
         ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
+        return ax
diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py
index 32594594..ca380a77 100644
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@@ -24,7 +24,7 @@ class GPLVM(GP):
     :type init: 'PCA'|'random'
 
     """
-    def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
+    def __init__(self, Y, Q, init='PCA', X=None, kernel=None, **kwargs):
         if X is None:
             X = self.initialise_latent(init, Q, Y)
         if kernel is None:
@@ -39,87 +39,89 @@ class GPLVM(GP):
             return np.random.randn(Y.shape[0], Q)
 
     def _get_param_names(self):
-        return sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[]) + GP._get_param_names(self)
+        return sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], []) + GP._get_param_names(self)
 
     def _get_params(self):
         return np.hstack((self.X.flatten(), GP._get_params(self)))
 
-    def _set_params(self,x):
-        self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
+    def _set_params(self, x):
+        self.X = x[:self.X.size].reshape(self.N, self.Q).copy()
         GP._set_params(self, x[self.X.size:])
 
     def _log_likelihood_gradients(self):
-        dL_dX = 2.*self.kern.dK_dX(self.dL_dK,self.X)
+        dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X)
 
-        return np.hstack((dL_dX.flatten(),GP._log_likelihood_gradients(self)))
+        return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))
 
     def plot(self):
-        assert self.likelihood.Y.shape[1]==2
-        pb.scatter(self.likelihood.Y[:,0],self.likelihood.Y[:,1],40,self.X[:,0].copy(),linewidth=0,cmap=pb.cm.jet)
-        Xnew = np.linspace(self.X.min(),self.X.max(),200)[:,None]
+        assert self.likelihood.Y.shape[1] == 2
+        pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet)
+        Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
         mu, var, upper, lower = self.predict(Xnew)
-        pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5)
+        pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
 
-    def plot_latent(self,labels=None, which_indices=None, resolution=50):
+    def plot_latent(self, labels=None, which_indices=None, resolution=50):
         """
         :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc)
         :param resolution: the resolution of the grid on which to evaluate the predictive variance
         """
 
         util.plot.Tango.reset()
-        
+
         if labels is None:
             labels = np.ones(self.N)
         if which_indices is None:
-            if self.Q==1:
+            if self.Q == 1:
                 input_1 = 0
                 input_2 = None
-            if self.Q==2:
-                input_1, input_2 = 0,1
+            if self.Q == 2:
+                input_1, input_2 = 0, 1
             else:
-                #try to find a linear of RBF kern in the kernel
-                k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
-                if (not len(k)==1) or (not k[0].ARD):
+                # try to find a linear of RBF kern in the kernel
+                k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
+                if (not len(k) == 1) or (not k[0].ARD):
                     raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
                 k = k[0]
-                if k.name=='rbf':
+                if k.name == 'rbf':
                     input_1, input_2 = np.argsort(k.lengthscale)[:2]
-                elif k.name=='linear':
+                elif k.name == 'linear':
                     input_1, input_2 = np.argsort(k.variances)[::-1][:2]
 
-        #first, plot the output variance as a function of the latent space
-        Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution)
-	Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
-	Xtest_full[:, :2] = Xtest
-	mu, var, low, up = self.predict(Xtest_full)
-	var = var[:, :2]
-        pb.imshow(var.reshape(resolution,resolution).T[::-1,:],extent=[xmin[0],xmax[0],xmin[1],xmax[1]],cmap=pb.cm.binary,interpolation='bilinear')
+        # first, plot the output variance as a function of the latent space
+        Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(self.X[:, [input_1, input_2]], resolution=resolution)
+        Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1]))
+        Xtest_full[:, :2] = Xtest
+        mu, var, low, up = self.predict(Xtest_full)
+        var = var[:, :1]
+        pb.imshow(var.reshape(resolution, resolution).T[::-1, :],
+                  extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear')
 
-
-        for i,ul in enumerate(np.unique(labels)):
+        for i, ul in enumerate(np.unique(labels)):
             if type(ul) is np.string_:
                 this_label = ul
             elif type(ul) is np.int64:
-                this_label = 'class %i'%ul
+                this_label = 'class %i' % ul
             else:
-                this_label = 'class %i'%i
+                this_label = 'class %i' % i
 
-            index = np.nonzero(labels==ul)[0]
-            if self.Q==1:
-                x = self.X[index,input_1]
+            index = np.nonzero(labels == ul)[0]
+            if self.Q == 1:
+                x = self.X[index, input_1]
                 y = np.zeros(index.size)
             else:
-                x = self.X[index,input_1]
-                y = self.X[index,input_2]
-            pb.plot(x,y,marker='o',color=util.plot.Tango.nextMedium(),mew=0,label=this_label,linewidth=0)
+                x = self.X[index, input_1]
+                y = self.X[index, input_2]
+            pb.plot(x, y, marker='o', color=util.plot.Tango.nextMedium(), mew=0, label=this_label, linewidth=0)
 
-        pb.xlabel('latent dimension %i'%input_1)
-        pb.ylabel('latent dimension %i'%input_2)
+        pb.xlabel('latent dimension %i' % input_1)
+        pb.ylabel('latent dimension %i' % input_2)
 
-        if not np.all(labels==1.):
-            pb.legend(loc=0,numpoints=1)
+        if not np.all(labels == 1.):
+            pb.legend(loc=0, numpoints=1)
 
-        pb.xlim(xmin[0],xmax[0])
-        pb.ylim(xmin[1],xmax[1])
-
-        return input_1, input_2
+        pb.xlim(xmin[0], xmax[0])
+        pb.ylim(xmin[1], xmax[1])
+        pb.grid(b=False)  # remove the grid if present, it doesn't look good
+        ax = pb.gca()
+        ax.set_aspect('auto')  # set a nice aspect ratio
+        return ax
diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py
index 59f598f9..d82bb50f 100644
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@@ -51,6 +51,26 @@ def _mdot_r(a,b):
    return np.dot(a,b)
 
 def jitchol(A,maxtries=5):
+    A = np.asfortranarray(A)
+    L,info = linalg.lapack.flapack.dpotrf(A,lower=1)
+    if info ==0:
+        return L
+    else:
+        diagA = np.diag(A)
+        if np.any(diagA<0.):
+            raise linalg.LinAlgError, "not pd: negative diagonal elements"
+        jitter= diagA.mean()*1e-6
+        for i in range(1,maxtries+1):
+            print 'Warning: adding jitter of '+str(jitter)
+            try:
+                return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
+            except:
+                jitter *= 10
+        raise linalg.LinAlgError,"not positive definite, even with jitter."
+
+
+
+def jitchol_old(A,maxtries=5):
     """
     :param A : An almost pd square matrix
 
@@ -71,7 +91,7 @@ def jitchol(A,maxtries=5):
         for i in range(1,maxtries+1):
             print 'Warning: adding jitter of '+str(jitter)
             try:
-                return linalg.cholesky(A+np.eye(A.shape[0])*jitter, lower = True)
+                return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
             except:
                 jitter *= 10
 
@@ -93,7 +113,8 @@ def pdinv(A):
     L = jitchol(A)
     logdet = 2.*np.sum(np.log(np.diag(L)))
     Li = chol_inv(L)
-    Ai = np.dot(Li.T,Li) #TODO: get the flapack routine form multiplying triangular matrices
+    Ai = linalg.lapack.flapack.dpotri(L)[0]
+    Ai = np.tril(Ai) + np.tril(Ai,-1).T
 
     return Ai, L, Li, logdet
 
diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py
index dde9cd32..f9538942 100644
--- a/GPy/util/visualize.py
+++ b/GPy/util/visualize.py
@@ -4,7 +4,7 @@ import GPy
 import numpy as np
 
 class lvm:
-    def __init__(self, model, data_visualize, latent_axis):
+    def __init__(self, model, data_visualize, latent_axis, latent_index=[0,1], latent_dim=2):
         self.cid = latent_axis.figure.canvas.mpl_connect('button_press_event', self.on_click)
         self.cid = latent_axis.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
         self.data_visualize = data_visualize
@@ -12,6 +12,8 @@ class lvm:
         self.latent_axis = latent_axis
         self.called = False
         self.move_on = False
+        self.latent_index = latent_index
+        self.latent_dim = latent_dim
 
     def on_click(self, event):
         #print 'click', event.xdata, event.ydata
@@ -32,7 +34,8 @@ class lvm:
         if self.called and self.move_on:
             # Call modify code on move
             #print 'move', event.xdata, event.ydata
-            latent_values = np.array((event.xdata, event.ydata))
+            latent_values = np.zeros((1,self.latent_dim))
+            latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata])
             y = self.model.predict(latent_values)[0]
             self.data_visualize.modify(y)
             #print 'y', y
@@ -45,7 +48,7 @@ class data_show:
         # If no axes are defined, create some.
         if axis==None:
             fig = plt.figure()
-            self.axis = fig.add_subplot(111)            
+            self.axis = fig.add_subplot(111)
         else:
             self.axis = axis
 
@@ -57,7 +60,7 @@ class vector_show(data_show):
     def __init__(self, vals, axis=None):
         data_show.__init__(self, vals, axis)
         self.vals = vals.T
-        self.handle = plt.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
+        self.handle = self.axis.plot(np.arange(0, len(vals))[:, None], self.vals)[0]
 
     def modify(self, vals):
         xdata, ydata = self.handle.get_data()
@@ -84,7 +87,7 @@ class image_show(data_show):
         self.handle.set_array(self.vals)
         #self.axis.figure.canvas.draw()
         plt.show()
-        
+
     def set_image(self, vals):
         self.vals = np.reshape(vals, self.dimensions, order='F')
         if self.transpose:
@@ -94,7 +97,7 @@ class image_show(data_show):
         #if self.invert:
         #    self.vals = -self.vals
 
-class stick_show(data_show): 
+class stick_show(data_show):
     """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
 
     def __init__(self, vals, axis=None, connect=None):
@@ -159,6 +162,6 @@ class stick_show(data_show):
             self.line_handle = self.axis.plot(np.array(x), np.array(y), np.array(z), 'b-')
 
         self.axis.figure.canvas.draw()
-        
+