Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-06 10:32:39 +02:00 · 2015-09-10 14:27:32 +01:00 · 2015-09-10 14:27:32 +01:00 · 171ec1a563
commit 171ec1a563
parent 5608dc8c67 ef778f75d4
22 changed files with 363 additions and 389 deletions
--- a/GPy/init.py
+++ b/GPy/init.py
@ -21,6 +21,8 @@ from . import plotting
 from .core import Model
 from .core.parameterization import Param, Parameterized, ObsAr

+from .__version__ import __version__
+
 #@nottest
 try:
    #Get rid of nose dependency by only ignoring if you have nose installed
--- a/GPy/version.py
+++ b/GPy/version.py
@ -0,0 +1 @@
+__version__ = "0.8.3"
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -535,7 +535,7 @@ class GP(Model):
        which_data_ycols='all', fixed_inputs=[],
        levels=20, samples=0, fignum=None, ax=None, resolution=None,
        plot_raw=False, linecol=None,fillcol=None, Y_metadata=None,
-        data_symbol='kx', predict_kw=None, plot_training_data=True):
+        data_symbol='kx', predict_kw=None, plot_training_data=True, samples_y=0, apply_link=False):
        """
        Plot the posterior of the GP.
          - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -558,7 +558,7 @@ class GP(Model):
        :param levels: number of levels to plot in a contour plot.
        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        :type levels: int
-        :param samples: the number of a posteriori samples to plot
+        :param samples: the number of a posteriori samples to plot, p(f*|y)
        :type samples: int
        :param fignum: figure to plot on.
        :type fignum: figure number
@ -574,6 +574,10 @@ class GP(Model):
        :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
        :param plot_training_data: whether or not to plot the training points
        :type plot_training_data: boolean
+        :param samples_y: the number of a posteriori samples to plot, p(y*|y)
+        :type samples_y: int
+        :param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*, when plotting posterior samples f
+        :type apply_link: boolean
        """
        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from ..plotting.matplot_dep import models_plots
@ -587,7 +591,7 @@ class GP(Model):
                                     levels, samples, fignum, ax, resolution,
                                     plot_raw=plot_raw, Y_metadata=Y_metadata,
                                     data_symbol=data_symbol, predict_kw=predict_kw,
-                                     plot_training_data=plot_training_data, **kw)
+                                     plot_training_data=plot_training_data, samples_y=samples_y, apply_link=apply_link, **kw)


    def plot_data(self, which_data_rows='all',
@ -613,7 +617,7 @@ class GP(Model):
        :param levels: number of levels to plot in a contour plot.
        :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
        :type levels: int
-        :param samples: the number of a posteriori samples to plot
+        :param samples: the number of a posteriori samples to plot, p(f*|y)
        :type samples: int
        :param fignum: figure to plot on.
        :type fignum: figure number
--- a/GPy/core/parameterization/priors.py
+++ b/GPy/core/parameterization/priors.py
@ -366,6 +366,7 @@ class InverseGamma(Gamma):
    def rvs(self, n):
        return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)

+
 class DGPLVM_KFDA(Prior):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable function using
@ -512,6 +513,7 @@ class DGPLVM_KFDA(Prior):
        self.A = self.compute_A(lst_ni)
        self.x_shape = x_shape

+
 class DGPLVM(Prior):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
@ -669,7 +671,7 @@ class DGPLVM(Prior):
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
-        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
+        # sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
@ -1198,6 +1200,7 @@ class DGPLVM_T(Prior):



+
 class HalfT(Prior):
    """
    Implementation of the half student t probability function, coupled with random variables.
@ -1208,15 +1211,17 @@ class HalfT(Prior):
    """
    domain = _POSITIVE
    _instances = []
-    def __new__(cls, A, nu): # Singleton:
+
+    def __new__(cls, A, nu):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().A == A and instance().nu == nu:
-                   return instance()
+                    return instance()
        o = super(Prior, cls).__new__(cls, A, nu)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()
+
    def __init__(self, A, nu):
        self.A = float(A)
        self.nu = float(nu)
@ -1225,37 +1230,81 @@ class HalfT(Prior):
    def __str__(self):
        return "hT({:.2g}, {:.2g})".format(self.A, self.nu)

-    def lnpdf(self,theta):
-        return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
+    def lnpdf(self, theta):
+        return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2))

-        #theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
-        #lnpdfs = np.zeros_like(theta)
-        #theta = np.array([theta])
-        #above_zero = theta.flatten()>1e-6
-        #v = self.nu
-        #sigma2=self.A
-        #stop
-        #lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
-        #    - gammaln(v * 0.5)
-        #    - 0.5*np.log(sigma2 * v * np.pi)
-        #    - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
-        #)
-        #return lnpdfs
+        # theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
+        # lnpdfs = np.zeros_like(theta)
+        # theta = np.array([theta])
+        # above_zero = theta.flatten()>1e-6
+        # v = self.nu
+        # sigma2=self.A
+        # stop
+        # lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
+        #     - gammaln(v * 0.5)
+        #     - 0.5*np.log(sigma2 * v * np.pi)
+        #     - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
+        # )
+        # return lnpdfs

-    def lnpdf_grad(self,theta):
-        theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
+    def lnpdf_grad(self, theta):
+        theta = theta if isinstance(theta, np.ndarray) else np.array([theta])
        grad = np.zeros_like(theta)
-        above_zero = theta>1e-6
+        above_zero = theta > 1e-6
        v = self.nu
-        sigma2=self.A
+        sigma2 = self.A
        grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
        return grad

    def rvs(self, n):
-         #return np.random.randn(n) * self.sigma + self.mu
-         from scipy.stats import t
-         #[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
-         ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
-         ret[ret<0] = 0
-         return ret
+        # return np.random.randn(n) * self.sigma + self.mu
+        from scipy.stats import t
+        # [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
+        ret = t.rvs(self.nu, loc=0, scale=self.A, size=n)
+        ret[ret < 0] = 0
+        return ret

+
+class Exponential(Prior):
+    """
+    Implementation of the Exponential probability function,
+    coupled with random variables.
+
+    :param l: shape parameter
+
+    """
+    domain = _POSITIVE
+    _instances = []
+
+    def __new__(cls, l):  # Singleton:
+        if cls._instances:
+            cls._instances[:] = [instance for instance in cls._instances if instance()]
+            for instance in cls._instances:
+                if instance().l == l:
+                    return instance()
+        o = super(Exponential, cls).__new__(cls, l)
+        cls._instances.append(weakref.ref(o))
+        return cls._instances[-1]()
+
+    def __init__(self, l):
+        self.l = l
+
+    def __str__(self):
+        return "Exp({:.2g})".format(self.l)
+
+    def summary(self):
+        ret = {"E[x]": 1. / self.l,
+               "E[ln x]": np.nan,
+               "var[x]": 1. / self.l**2,
+               "Entropy": 1. - np.log(self.l),
+               "Mode": 0.}
+        return ret
+
+    def lnpdf(self, x):
+        return np.log(self.l) - self.l * x
+
+    def lnpdf_grad(self, x):
+        return - self.l
+
+    def rvs(self, n):
+        return np.random.exponential(scale=self.l, size=n)
--- a/GPy/core/parameterization/transformations.py
+++ b/GPy/core/parameterization/transformations.py
@ -62,7 +62,7 @@ class Transformation(object):
        import matplotlib.pyplot as plt
        from ...plotting.matplot_dep import base_plots
        x = np.linspace(-8,8)
-        base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
+        base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
        axes = plt.gca()
        axes.set_xlabel(xlabel)
        axes.set_ylabel(ylabel)
@ -488,7 +488,7 @@ class Logistic(Transformation):
                    return instance()
        newfunc = super(Transformation, cls).__new__
        if newfunc is object.__new__:
-            o = newfunc(cls)
+            o = newfunc(cls)  
        else:
            o = newfunc(cls, lower, upper, *args, **kwargs)
        cls._instances.append(weakref.ref(o))
--- a/GPy/inference/mcmc/init.py
+++ b/GPy/inference/mcmc/init.py
@ -1 +1,2 @@
 from .hmc import HMC
+from .samplers import *
--- a/GPy/inference/mcmc/samplers.py
+++ b/GPy/inference/mcmc/samplers.py
@ -18,11 +18,11 @@ class Metropolis_Hastings:
    def __init__(self,model,cov=None):
        """Metropolis Hastings, with tunings according to Gelman et al. """
        self.model = model
-        current = self.model._get_params_transformed()
+        current = self.model.optimizer_array
        self.D = current.size
        self.chains = []
        if cov is None:
-            self.cov = model.Laplace_covariance()
+            self.cov = np.eye(self.D)
        else:
            self.cov = cov
        self.scale = 2.4/np.sqrt(self.D)
@ -33,20 +33,20 @@ class Metropolis_Hastings:
        if start is None:
            self.model.randomize()
        else:
-            self.model._set_params_transformed(start)
+            self.model.optimizer_array = start

-
-
-    def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
-        current = self.model._get_params_transformed()
-        fcurrent = self.model.log_likelihood() + self.model.log_prior()
+    def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
+        current = self.model.optimizer_array
+        fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
+                   self.model._log_det_jacobian()
        accepted = np.zeros(Ntotal,dtype=np.bool)
        for it in range(Ntotal):
-            print("sample %d of %d\r"%(it,Ntotal), end=' ')
+            print("sample %d of %d\r"%(it,Ntotal),end="\t")
            sys.stdout.flush()
            prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
-            self.model._set_params_transformed(prop)
-            fprop = self.model.log_likelihood() + self.model.log_prior()
+            self.model.optimizer_array = prop
+            fprop = self.model.log_likelihood() + self.model.log_prior() + \
+                    self.model._log_det_jacobian()

            if fprop>fcurrent:#sample accepted, going 'uphill'
                accepted[it] = True
@ -74,10 +74,11 @@ class Metropolis_Hastings:

    def predict(self,function,args):
        """Make a prediction for the function, to which we will pass the additional arguments"""
-        param = self.model._get_params()
+        param = self.model.param_array
        fs = []
        for p in self.chain:
-            self.model._set_params(p)
+            self.model.param_array = p
            fs.append(function(*args))
-        self.model._set_params(param)# reset model to starting state
+        # reset model to starting state
+        self.model.param_array = param
        return fs
--- a/GPy/kern/_src/kern.py
+++ b/GPy/kern/_src/kern.py
@ -256,8 +256,6 @@ class Kern(Parameterized):

        :param other: the other kernel to be added
        :type other: GPy.kern
-        :param tensor: whether or not to use the tensor space (default is false).
-        :type tensor: bool

        """
        assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
--- a/GPy/kern/_src/prod.py
+++ b/GPy/kern/_src/prod.py
@ -27,8 +27,6 @@ class Prod(CombinationKernel):

    :param k1, k2: the kernels to multiply
    :type k1, k2: Kern
-    :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
-    :type tensor: Boolean
    :rtype: kernel object

    """
--- a/GPy/likelihoods/exponential.py
+++ b/GPy/likelihoods/exponential.py
@ -124,7 +124,7 @@ class Exponential(Likelihood):
        #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
        return d3lik_dlink3

-    def samples(self, gp):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -622,7 +622,7 @@ class Likelihood(Parameterized):
        Nf_samp = 300
        Ny_samp = 1
        s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
-        ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
+        ss_y = self.samples(s, Y_metadata)#, samples=Ny_samp)
        #ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)

        pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]
--- a/GPy/likelihoods/poisson.py
+++ b/GPy/likelihoods/poisson.py
@ -137,7 +137,7 @@ class Poisson(Likelihood):
        """
        return self.gp_link.transf(gp)

-    def samples(self, gp, Y_metadata=None, samples=1):
+    def samples(self, gp, Y_metadata=None):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

@ -145,5 +145,7 @@ class Poisson(Likelihood):
        """
        orig_shape = gp.shape
        gp = gp.flatten()
-        Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
-        return Ysim.reshape(orig_shape+(samples,))
+        # Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
+        # return Ysim.reshape(orig_shape+(samples,))
+        Ysim = np.random.poisson(self.gp_link.transf(gp))
+        return Ysim.reshape(orig_shape)
--- a/GPy/plotting/matplot_dep/models_plots.py
+++ b/GPy/plotting/matplot_dep/models_plots.py
@ -75,7 +75,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
        levels=20, samples=0, fignum=None, ax=None, resolution=None,
        plot_raw=False,
        linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
-        apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
+        apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
    """
    Plot the posterior of the GP.
      - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -93,24 +93,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
    :type which_data_rows: 'all' or a list of integers
    :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
    :type fixed_inputs: a list of tuples
-    :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
-    :type resolution: int
-    :param levels: number of levels to plot in a contour plot.
+    :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
    :type levels: int
-    :param samples: the number of a posteriori samples to plot p(y*|y)
+    :param samples: the number of a posteriori samples to plot p(f*|y)
    :type samples: int
    :param fignum: figure to plot on.
    :type fignum: figure number
    :param ax: axes to plot on.
    :type ax: axes handle
+    :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
+    :type resolution: int
+    :param plot_raw: Whether to plot the raw function p(f|y)
+    :type plot_raw: boolean
    :param linecol: color of line to plot.
-    :type linecol:
+    :type linecol: hex or color
    :param fillcol: color of fill
-    :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
-    :param apply_link: apply the link function if plotting f (default false)
+    :type fillcol: hex or color
+    :param apply_link: apply the link function if plotting f (default false), as well as posterior samples if requested
    :type apply_link: boolean
-    :param samples_f: the number of posteriori f samples to plot p(f*|y)
-    :type samples_f: int
+    :param samples_y: the number of posteriori f samples to plot p(y*|y)
+    :type samples_y: int
+    :param plot_uncertain_inputs: plot the uncertainty of the inputs as error bars if they have uncertainty (BGPLVM etc.)
+    :type plot_uncertain_inputs: boolean
+    :param predict_kw: keyword args for _raw_predict and predict functions if required
+    :type predict_kw: dict
    :param plot_training_data: whether or not to plot the training points
    :type plot_training_data: boolean
    """
@ -185,17 +191,17 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',

        #optionally plot some samples
        if samples: #NOTE not tested with fixed_inputs
-            Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
-            print(Ysim.shape)
-            print(Xnew.shape)
-            for yi in Ysim.T:
-                plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], '#3300FF', linewidth=0.25)
-                #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
-
-        if samples_f: #NOTE not tested with fixed_inputs
-            Fsim = model.posterior_samples_f(Xgrid, samples_f)
+            Fsim = model.posterior_samples_f(Xgrid, samples)
+            if apply_link:
+                Fsim = model.likelihood.gp_link.transf(Fsim)
            for fi in Fsim.T:
-                plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
+                plots['posterior_samples'] = ax.plot(Xnew, fi[:,None], '#3300FF', linewidth=0.25)
+                #ax.plot(Xnew, fi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
+
+        if samples_y: #NOTE not tested with fixed_inputs
+            Ysim = model.posterior_samples(Xgrid, samples_y, Y_metadata=Y_metadata)
+            for yi in Ysim.T:
+                plots['posterior_samples_y'] = ax.scatter(Xnew, yi[:,None], s=5, c=Tango.colorsHex['darkBlue'], marker='o', alpha=0.5)
                #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.


--- a/GPy/testing/linalg_test.py
+++ b/GPy/testing/linalg_test.py
@ -1,7 +1,6 @@
 import numpy as np
 import scipy as sp
-from GPy.util.linalg import jitchol
-import GPy
+from ..util.linalg import jitchol,trace_dot, ijk_jlk_to_il, ijk_ljk_to_ilk

 class LinalgTests(np.testing.TestCase):
    def setUp(self):
@ -37,18 +36,19 @@ class LinalgTests(np.testing.TestCase):
        except sp.linalg.LinAlgError:
            return True

-    def test_einsum_ijk_jlk_to_il(self):
-        A = np.random.randn(50, 150, 5)
-        B = np.random.randn(150, 100, 5)
-        pure = np.einsum('ijk,jlk->il', A, B)
-        quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
-        np.testing.assert_allclose(pure, quick)
+    def test_trace_dot(self):
+        N = 5
+        A = np.random.rand(N,N)
+        B = np.random.rand(N,N)
+        trace = np.trace(A.dot(B))
+        test_trace = trace_dot(A,B)
+        np.testing.assert_allclose(trace,test_trace,atol=1e-13)

    def test_einsum_ij_jlk_to_ilk(self):
        A = np.random.randn(15, 150, 5)
        B = np.random.randn(150, 50, 5)
        pure = np.einsum('ijk,jlk->il', A, B)
-        quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
+        quick = ijk_jlk_to_il(A,B)
        np.testing.assert_allclose(pure, quick)

    def test_einsum_ijk_ljk_to_ilk(self):
@ -56,5 +56,5 @@ class LinalgTests(np.testing.TestCase):
        B = np.random.randn(150, 20, 5)
        #B = A.copy()
        pure = np.einsum('ijk,ljk->ilk', A, B)
-        quick = GPy.util.linalg.ijk_ljk_to_ilk(A,B)
+        quick = ijk_ljk_to_ilk(A,B)
        np.testing.assert_allclose(pure, quick)
--- a/GPy/testing/rv_transformation_tests.py
+++ b/GPy/testing/rv_transformation_tests.py
@ -0,0 +1,101 @@
+# Written by Ilias Bilionis
+"""
+Test if hyperparameters in models are properly transformed.
+"""
+
+
+import unittest
+import numpy as np
+import scipy.stats as st
+import GPy
+
+
+class TestModel(GPy.core.Model):
+    """
+    A simple GPy model with one parameter.
+    """
+    def __init__(self):
+        GPy.core.Model.__init__(self, 'test_model')
+        theta = GPy.core.Param('theta', 1.)
+        self.link_parameter(theta)
+
+    def log_likelihood(self):
+        return 0.
+
+
+class RVTransformationTestCase(unittest.TestCase):
+
+    def _test_trans(self, trans):
+        m = TestModel()
+        prior = GPy.priors.LogGaussian(.5, 0.1)
+        m.theta.set_prior(prior)
+        m.theta.unconstrain()
+        m.theta.constrain(trans)
+        # The PDF of the transformed variables
+        p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
+        # To the empirical PDF of:
+        theta_s = prior.rvs(100000)
+        phi_s = trans.finv(theta_s)
+        # which is essentially a kernel density estimation
+        kde = st.gaussian_kde(phi_s)
+        # We will compare the PDF here:
+        phi = np.linspace(phi_s.min(), phi_s.max(), 100)
+        # The transformed PDF of phi should be this:
+        pdf_phi = np.array([p_phi(p) for p in phi])
+        # UNCOMMENT TO SEE GRAPHICAL COMPARISON
+        #import matplotlib.pyplot as plt
+        #fig, ax = plt.subplots()
+        #ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
+        #ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
+        #ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
+        #ax.set_xlabel(r'transformed $\theta$', fontsize=16)
+        #ax.set_ylabel('PDF', fontsize=16)
+        #plt.legend(loc='best')
+        #plt.show(block=True)
+        # END OF PLOT
+        # The following test cannot be very accurate
+        self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
+        # Check the gradients at a few random points
+        for i in range(10):
+            m.theta = theta_s[i]
+            self.assertTrue(m.checkgrad(verbose=True))
+
+    def test_Logexp(self):
+        self._test_trans(GPy.constraints.Logexp())
+        self._test_trans(GPy.constraints.Exponent())
+
+
+if __name__ == '__main__':
+    unittest.main()
+    quit()
+    m = TestModel()
+    prior = GPy.priors.LogGaussian(0., .9)
+    m.theta.set_prior(prior)
+
+    # The following should return the PDF in terms of the transformed quantities
+    p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
+
+    # Let's look at the transformation phi = log(exp(theta - 1))
+    trans = GPy.constraints.Exponent()
+    m.theta.constrain(trans)
+    # Plot the transformed probability density
+    phi = np.linspace(-8, 8, 100)
+    fig, ax = plt.subplots()
+    # Let's draw some samples of theta and transform them so that we see
+    # which one is right
+    theta_s = prior.rvs(10000)
+    # Transform it to the new variables
+    phi_s = trans.finv(theta_s)
+    # And draw their histogram
+    ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
+    # This is to be compared to the PDF of the model expressed in terms of these new
+    # variables
+    ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
+    ax.set_xlim(-3, 10)
+    ax.set_xlabel(r'transformed $\theta$', fontsize=16)
+    ax.set_ylabel('PDF', fontsize=16)
+    plt.legend(loc='best')
+    # Now let's test the gradients
+    m.checkgrad(verbose=True)
+    # And show the plot
+    plt.show(block=True)
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -157,7 +157,7 @@ def trace_dot(a, b):
    """
    Efficiently compute the trace of the matrix product of a and b
    """
-    return np.sum(a * b)
+    return np.einsum('ij,ji->', a, b)

 def mdot(*args):
    """