diff --git a/GPy/inference/optimization/optimization.py b/GPy/inference/optimization/optimization.py
index aa9be793..5aa2ed03 100644
--- a/GPy/inference/optimization/optimization.py
+++ b/GPy/inference/optimization/optimization.py
@@ -140,6 +140,10 @@ class opt_lbfgsb(Optimizer):
         self.funct_eval = opt_result[2]['funcalls']
         self.status = rcstrings[opt_result[2]['warnflag']]
 
+        #a more helpful error message is available in opt_result in the Error case
+        if opt_result[2]['warnflag']==2:
+            self.status = 'Error' + opt_result[2]['task']
+
 class opt_simplex(Optimizer):
     def __init__(self, *args, **kwargs):
         Optimizer.__init__(self, *args, **kwargs)
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index f5690aa4..2febda96 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -2,10 +2,10 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
+
 import link_functions
 from likelihood import Likelihood
-from scipy import stats
 
 class Bernoulli(Likelihood):
     """
@@ -81,19 +81,18 @@ class Bernoulli(Likelihood):
         if isinstance(self.gp_link, link_functions.Probit):
 
             if gh_points is None:
-                gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
+                gh_x, gh_w = self._gh_points()
             else:
                 gh_x, gh_w = gh_points
 
-            from scipy import stats
 
             shape = m.shape
             m,v,Y = m.flatten(), v.flatten(), Y.flatten()
             Ysign = np.where(Y==1,1,-1)
             X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
-            p = stats.norm.cdf(X)
+            p = std_norm_cdf(X)
             p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
-            N = stats.norm.pdf(X)
+            N = std_norm_pdf(X)
             F = np.log(p).dot(gh_w)
             NoverP = N/p
             dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
@@ -106,10 +105,10 @@ class Bernoulli(Likelihood):
     def predictive_mean(self, mu, variance, Y_metadata=None):
 
         if isinstance(self.gp_link, link_functions.Probit):
-            return stats.norm.cdf(mu/np.sqrt(1+variance))
+            return std_norm_cdf(mu/np.sqrt(1+variance))
 
         elif isinstance(self.gp_link, link_functions.Heaviside):
-            return stats.norm.cdf(mu/np.sqrt(variance))
+            return std_norm_cdf(mu/np.sqrt(variance))
 
         else:
             raise NotImplementedError
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 4f3f2e37..9f2f3e7a 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
@@ -165,6 +165,13 @@ class Likelihood(Parameterized):
 
         return z, mean, variance
 
+    #only compute gh points if required
+    __gh_points = None
+    def _gh_points(self):
+        if self.__gh_points is None:
+            self.__gh_points = np.polynomial.hermite.hermgauss(20)
+        return self.__gh_points
+
     def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         """
         Use Gauss-Hermite Quadrature to compute
@@ -177,10 +184,9 @@ class Likelihood(Parameterized):
 
         if no gh_points are passed, we construct them using defualt options
         """
-        #May be broken
 
         if gh_points is None:
-            gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
+            gh_x, gh_w = self._gh_points()
         else:
             gh_x, gh_w = gh_points
 
diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index a4ddc760..6b297f92 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -1,10 +1,9 @@
-# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from scipy import stats
+from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
 import scipy as sp
-from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
 
 _exp_lim_val = np.finfo(np.float64).max
 _lim_val = np.log(_exp_lim_val)
@@ -64,13 +63,12 @@ class Identity(GPTransformation):
     def d3transf_df3(self,f):
         return np.zeros_like(f)
 
-
 class Probit(GPTransformation):
     """
     .. math::
 
         g(f) = \\Phi^{-1} (mu)
-    
+
     """
     def transf(self,f):
         return std_norm_cdf(f)
@@ -79,13 +77,10 @@ class Probit(GPTransformation):
         return std_norm_pdf(f)
 
     def d2transf_df2(self,f):
-        #FIXME
         return -f * std_norm_pdf(f)
 
     def d3transf_df3(self,f):
-        #FIXME
-        f2 = f**2
-        return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
+        return (np.square(f)-1.)*std_norm_pdf(f)
 
 
 class Cloglog(GPTransformation):
@@ -98,7 +93,7 @@ class Cloglog(GPTransformation):
         or
 
         f = \log (-\log(1-p))
-    
+
     """
     def transf(self,f):
         return 1-np.exp(-np.exp(f))
@@ -123,16 +118,16 @@ class Log(GPTransformation):
 
     """
     def transf(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def dtransf_df(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def d2transf_df2(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
     def d3transf_df3(self,f):
-        return np.exp(np.clip(f, -_lim_val, _lim_val))
+        return np.exp(np.clip(f, -np.inf, _lim_val))
 
 class Log_ex_1(GPTransformation):
     """
@@ -174,7 +169,7 @@ class Heaviside(GPTransformation):
 
     .. math::
 
-        g(f) = I_{x \\in A}
+        g(f) = I_{x \\geq 0}
 
     """
     def transf(self,f):
diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py
index 559014f7..5950de08 100644
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@@ -476,7 +476,7 @@ class GradientTests(np.testing.TestCase):
         likelihood = GPy.likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
         m = GPy.core.SparseGP(X, Y, X[np.random.choice(num_obs, 10)],
                               kern, likelihood,
-                              GPy.inference.latent_function_inference.VarDTC(),
+                              inference_method=GPy.inference.latent_function_inference.VarDTC(),
                               Y_metadata=Y_metadata)
         self.assertTrue(m.checkgrad())
 
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index 99bd62b3..84bf4dc1 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -23,7 +23,7 @@ def chain_1(df_dg, dg_dx):
     """
     if np.all(dg_dx==1.):
         return df_dg
-    if len(df_dg) > 1 and df_dg.shape[-1] > 1:
+    if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1:
         import ipdb; ipdb.set_trace()  # XXX BREAKPOINT
         raise NotImplementedError('Not implemented for matricies yet')
     return df_dg * dg_dx
@@ -37,7 +37,7 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
     """
     if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
         return d2f_dg2
-    if  len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1:
+    if  len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
         raise NotImplementedError('Not implemented for matricies yet')
     #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
     dg_dx_2 = dg_dx**2
diff --git a/GPy/util/univariate_Gaussian.py b/GPy/util/univariate_Gaussian.py
index 09b2e99c..79864f86 100644
--- a/GPy/util/univariate_Gaussian.py
+++ b/GPy/util/univariate_Gaussian.py
@@ -1,77 +1,15 @@
 # Copyright (c) 2012, 2013 Ricardo Andrade
+# Copyright (c) 2015 James Hensman
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
 from scipy import weave
+from scipy.special import ndtr as std_norm_cdf
 
+#define a standard normal pdf
+_sqrt_2pi = np.sqrt(2*np.pi)
 def std_norm_pdf(x):
-    """Standard Gaussian density function"""
-    return 1./np.sqrt(2.*np.pi)*np.exp(-.5*x**2)
-
-def std_norm_cdf(x):
-    """
-    Cumulative standard Gaussian distribution
-    Based on Abramowitz, M. and Stegun, I. (1970)
-    """
-    x_shape = np.asarray(x).shape
-
-    if len(x_shape) == 0 or x_shape[0] == 1:
-        sign = np.sign(x)
-        x *= sign
-        x /= np.sqrt(2.)
-        t = 1.0/(1.0 +  0.3275911*x)
-        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
-        cdf_x = 0.5*(1.0 + sign*erf)
-        return cdf_x
-    else:
-        x = np.atleast_1d(x).copy()
-        cdf_x = np.zeros_like(x)
-        sign = np.ones_like(x)
-        neg_x_ind = x<0
-        sign[neg_x_ind] = -1.0
-        x[neg_x_ind] = -x[neg_x_ind]
-        x /= np.sqrt(2.)
-        t = 1.0/(1.0 +  0.3275911*x)
-        erf = 1. - np.exp(-x**2)*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))))
-        cdf_x = 0.5*(1.0 + sign*erf)
-        cdf_x = cdf_x.reshape(x_shape)
-    return cdf_x
-
-def std_norm_cdf_weave(x):
-    """
-    Cumulative standard Gaussian distribution
-    Based on Abramowitz, M. and Stegun, I. (1970)
-
-    A weave implementation of std_norm_cdf, which is faster. this is unused,
-    because of the difficulties of a weave dependency. (see github issue #94)
-
-    """
-    #Generalize for many x
-    x = np.asarray(x).copy()
-    cdf_x = np.zeros_like(x)
-    N = x.size
-    support_code = "#include <math.h>"
-    code = """
-
-    double sign, t, erf;
-    for (int i=0; i<N; i++){
-        sign = 1.0;
-        if (x[i] < 0.0){
-            sign = -1.0;
-            x[i] = -x[i];
-        }
-        x[i] = x[i]/sqrt(2.0);
-
-        t = 1.0/(1.0 +  0.3275911*x[i]);
-
-        erf = 1. - exp(-x[i]*x[i])*t*(0.254829592 + t*(-0.284496736 + t*(1.421413741 + t*(-1.453152027 + t*(1.061405429)))));
-
-        //return_val = 0.5*(1.0 + sign*erf);
-        cdf_x[i] = 0.5*(1.0 + sign*erf);
-    }
-    """
-    weave.inline(code, arg_names=['x', 'cdf_x', 'N'], support_code=support_code)
-    return cdf_x
+    return np.exp(-np.square(x)/2)/_sqrt_2pi
 
 def inv_std_norm_cdf(x):
     """