Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-05-13 14:03:20 +02:00 · 2013-05-10 17:55:00 +01:00 · 2013-05-10 17:55:00 +01:00 · a500c526dd
commit a500c526dd
parent 6d65e2adf8 ab30ed7483
14 changed files with 174 additions and 198 deletions
--- a/GPy/core/transformations.py
+++ b/GPy/core/transformations.py
@ -50,7 +50,9 @@ class logexp_clipped(transformation):
        ef = np.exp(f)
        gf = (ef - 1.) / ef
        return np.where(f < 1e-6, 0, gf)
-    def initialize(self, f):
+    def initialize(self,f):
        if np.any(f<0.):
            print "Warning: changing parameters to satisfy constraints"
        return np.abs(f)
    def __str__(self):
        return '(+ve)'
@ -65,6 +67,8 @@ class exponent(transformation):
    def gradfactor(self, f):
        return f
    def initialize(self, f):
        if np.any(f<0.):
            print "Warning: changing parameters to satisfy constraints"
        return np.abs(f)
    def __str__(self):
        return '(+ve)'
@ -79,6 +83,8 @@ class negative_exponent(transformation):
    def gradfactor(self, f):
        return f
    def initialize(self, f):
        if np.any(f>0.):
            print "Warning: changing parameters to satisfy constraints"
        return -np.abs(f)
    def __str__(self):
        return '(-ve)'
@ -108,9 +114,11 @@ class logistic(transformation):
    def finv(self, f):
        return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
    def gradfactor(self, f):
-        return (f - self.lower) * (self.upper - f) / self.difference
+        return (f-self.lower)*(self.upper-f)/self.difference
-    def initialize(self, f):
+    def initialize(self,f):
-        return self.f(f * 0.)
+        if np.any(np.logical_or(f<self.lower,f>self.upper)):
            print "Warning: changing parameters to satisfy constraints"
        return np.where(np.logical_or(f<self.lower,f>self.upper),self.f(f*0.),f)
    def __str__(self):
        return '({},{})'.format(self.lower, self.upper)
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -63,7 +63,7 @@ def GPLVM_oil_100(optimize=True):
    m.plot_latent(labels=m.data_labels)
    return m
-def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
+def BGPLVM_oil(optimize=True, N=100, Q=10, M=20, max_f_eval=300, plot=False):
    data = GPy.util.datasets.oil()
    # create simple GP model
@ -72,19 +72,19 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M)
    m.data_labels = data['Y'][:N].argmax(axis=1)
    m.constrain('variance', logexp_clipped())
    m.constrain('length', logexp_clipped())
    m['lengt'] = 100.
    m.ensure_default_constraints()
    # optimize
    if optimize:
-        m.constrain_fixed('noise', 1. / Y.var())
+        m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
-        m.constrain('variance', logexp_clipped())
+        m.optimize('scg', messages=1, max_f_eval=150)
        m['lengt'] = 1000
        m.ensure_default_constraints()
        m.optimize('scg', messages=1, max_f_eval=max(80, max_f_eval))
        m.unconstrain('noise')
-        m.constrain_positive('noise')
+        m.constrain('noise', logexp_clipped())
-        m.optimize('scg', messages=1, max_f_eval=max(0, max_f_eval - 80))
+        m.optimize('scg', messages=1, max_f_eval=max_f_eval)
    else:
        m.ensure_default_constraints()
    if plot:
        y = m.likelihood.Y[0, :]
@ -92,7 +92,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300, plot=False):
        plt.sca(latent_axes)
        m.plot_latent()
        data_show = GPy.util.visualize.vector_show(y)
-        lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes)  # , sense_axes=sense_axes)
+        lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes) # , sense_axes=sense_axes)
        raw_input('Press enter to finish')
        plt.close('all')
    # # plot
@ -182,7 +182,7 @@ def bgplvm_simulation_matlab_compare():
    Y = sim_data['Y']
    S = sim_data['S']
    mu = sim_data['mu']
-    M, [_, Q] = 20, mu.shape
+    M, [_, Q] = 3, mu.shape
    from GPy.models import mrd
    from GPy import kern
@ -192,7 +192,7 @@ def bgplvm_simulation_matlab_compare():
    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
 #                        X=mu,
 #                        X_variance=S,
-                       _debug=True)
+                       _debug=False)
    m.ensure_default_constraints()
    m.auto_scale_factor = True
    m['noise'] = Y.var() / 100.
@ -223,7 +223,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
    Y = Ylist[0]
-    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))  # + kern.bias(Q)
+    k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
 #     k = kern.white(Q, .00001) + kern.bias(Q)
    m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
    # m.set('noise',)
@ -358,7 +358,7 @@ def mrd_simulation(plot_sim=False):
 #         import ipdb; ipdb.set_trace()
 #     np.seterrcall(ipdbonerr)
-    return m  # , mtest
+    return m # , mtest
 def mrd_silhouette():
@ -371,7 +371,7 @@ def brendan_faces():
    # optimize
    m.ensure_default_constraints()
-    # m.optimize(messages=1, max_f_eval=10000)
+    m.optimize(messages=1, max_f_eval=10000)
    ax = m.plot_latent()
    y = m.likelihood.Y[0, :]
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@ -49,6 +49,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
    function_eval = 1
    fnow = fold
    gradnew = gradf(x, *optargs)  # Initial gradient.
    current_grad = np.dot(gradnew, gradnew)
    gradold = gradnew.copy()
    d = -gradnew  # Initial search direction.
    success = True  # Force calculation of directional derivs.
@ -110,7 +111,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        iteration += 1
        if display:
            print '\r',
-            print 'i: {0:>5g}  f:{1:> 12e}  b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)),
+            print 'i: {0:>5g}  f:{1:> 12e}  b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, current_grad),
            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
            sys.stdout.flush()
@ -125,8 +126,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
                fold = fnew
                gradold = gradnew
                gradnew = gradf(x, *optargs)
                current_grad = np.dot(gradnew, gradnew)
                # If the gradient is zero then we are done.
-                if (np.dot(gradnew, gradnew)) <= gtol:
+                if current_grad <= gtol:
                    status = 'converged'
                    return x, flog, function_eval, status
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
 try:
    from constructors import rbf_sympy, sympykern # these depend on sympy
 except:
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -20,7 +20,6 @@ from periodic_exponential import periodic_exponential as periodic_exponentialpar
 from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
 from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
 from prod import prod as prodpart
 from prod_orthogonal import prod_orthogonal as prod_orthogonalpart
 from symmetric import symmetric as symmetric_part
 from coregionalise import coregionalise as coregionalise_part
 from rational_quadratic import rational_quadratic as rational_quadraticpart
@ -255,7 +254,7 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,
    part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
    return kern(D, [part])
-def prod(k1,k2):
+def prod(k1,k2,tensor=False):
    """
     Construct a product kernel over D from two kernels over D
@ -263,19 +262,8 @@ def prod(k1,k2):
    :type k1, k2: kernpart
    :rtype: kernel object
    """
-    part = prodpart(k1,k2)
+    part = prodpart(k1,k2,tensor)
-    return kern(k1.D, [part])
+    return kern(part.D, [part])
 def prod_orthogonal(k1,k2):
    """
     Construct a product kernel over D1 x D2 from a kernel over D1 and another over D2.
    :param k1, k2: the kernels to multiply
    :type k1, k2: kernpart
    :rtype: kernel object
    """
    part = prod_orthogonalpart(k1,k2)
    return kern(k1.D+k2.D, [part])
 def symmetric(k):
    """
--- a/GPy/kern/kern.py
+++ b/GPy/kern/kern.py
@ -7,7 +7,6 @@ import pylab as pb
 from ..core.parameterised import parameterised
 from kernpart import kernpart
 import itertools
 from prod_orthogonal import prod_orthogonal
 from prod import prod
 from ..util.linalg import symmetrify
@ -84,96 +83,72 @@ class kern(parameterised):
            count += p.Nparam
    def __add__(self, other):
-        assert self.D == other.D
+        """
-        newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
+        Shortcut for `add`.
-        # transfer constraints:
+        """
-        newkern.constrained_indices = self.constrained_indices + [i+self.Nparam  for i in other.constrained_indices]
+        return self.add(other)
        newkern.constraints = self.constraints + other.constraints
        newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
        newkern.fixed_values = self.fixed_values + other.fixed_values
        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
        return newkern
-    def add(self, other):
+    def add(self, other,tensor=False):
        """
        Add another kernel to this one. Both kernels are defined on the same _space_
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
-        return self + other
+        if tensor:
            D = self.D + other.D
            self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
            other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
            other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
-    def add_orthogonal(self, other):
+            newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
        """
        Add another kernel to this one. Both kernels are defined on separate spaces
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
        # deal with input slices
        D = self.D + other.D
        self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
        other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
        other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
-        newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
+            # transfer constraints:
-
+            newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
-        # transfer constraints:
+            newkern.constraints = self.constraints + other.constraints
-        newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
+            newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
-        newkern.constraints = self.constraints + other.constraints
+            newkern.fixed_values = self.fixed_values + other.fixed_values
-        newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
+            newkern.constraints = self.constraints + other.constraints
-        newkern.fixed_values = self.fixed_values + other.fixed_values
+            newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
-        newkern.constraints = self.constraints + other.constraints
+        else:
-        newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
+            assert self.D == other.D
-        newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
+            newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
            # transfer constraints:
            newkern.constrained_indices = self.constrained_indices + [i+self.Nparam  for i in other.constrained_indices]
            newkern.constraints = self.constraints + other.constraints
            newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
            newkern.fixed_values = self.fixed_values + other.fixed_values
            newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
        return newkern
    def __mul__(self, other):
        """
-        Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
+        Shortcut for `prod`.
        """
        return self.prod(other)
-    def prod(self, other):
+    def prod(self, other,tensor=False):
        """
-        multiply two kernels defined on the same spaces.
+        multiply two kernels (either on the same space, or on the tensor product of the input space)
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
        K1 = self.copy()
        K2 = other.copy()
        newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
        slices = []
-        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
+        for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
-            s1, s2 = [False] * K1.D, [False] * K2.D
+            s1, s2 = [False]*K1.D, [False]*K2.D
            s1[sl1], s2[sl2] = [True], [True]
-            slices += [s1 + s2]
+            slices += [s1+s2]
        newkernparts = [prod(k1, k2,tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)]
        if tensor:
            newkern = kern(K1.D + K2.D, newkernparts, slices)
        else:
            newkern = kern(K1.D, newkernparts, slices)
        newkern = kern(K1.D, newkernparts, slices)
        newkern._follow_constrains(K1, K2)
        return newkern
    def prod_orthogonal(self, other):
        """
        multiply two kernels. Both kernels are defined on separate spaces.
        :param other: the other kernel to be added
        :type other: GPy.kern
        """
        K1 = self.copy()
        K2 = other.copy()
        newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
        slices = []
        for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
            s1, s2 = [False] * K1.D, [False] * K2.D
            s1[sl1], s2[sl2] = [True], [True]
            slices += [s1 + s2]
        newkern = kern(K1.D + K2.D, newkernparts, slices)
        newkern._follow_constrains(K1, K2)
        return newkern
    def _follow_constrains(self, K1, K2):
@ -469,9 +444,9 @@ class kern(parameterised):
        return target_mu, target_S
-    def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
+    def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs):
-        if which_functions == 'all':
+        if which_parts == 'all':
-            which_functions = [True] * self.Nparts
+            which_parts = [True] * self.Nparts
        if self.D == 1:
            if x is None:
                x = np.zeros((1, 1))
@ -488,7 +463,7 @@ class kern(parameterised):
                raise ValueError, "Bad limits for plotting"
            Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
-            Kx = self.K(Xnew, x, slices2=which_functions)
+            Kx = self.K(Xnew, x, which_parts)
            pb.plot(Xnew, Kx, *args, **kwargs)
            pb.xlim(xmin, xmax)
            pb.xlabel("x")
@ -514,7 +489,7 @@ class kern(parameterised):
            xg = np.linspace(xmin[0], xmax[0], resolution)
            yg = np.linspace(xmin[1], xmax[1], resolution)
            Xnew = np.vstack((xx.flatten(), yy.flatten())).T
-            Kx = self.K(Xnew, x, slices2=which_functions)
+            Kx = self.K(Xnew, x, which_parts)
            Kx = Kx.reshape(resolution, resolution).T
            pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
            pb.xlim(xmin[0], xmax[0])
--- a/GPy/kern/prod.py
+++ b/GPy/kern/prod.py
@ -4,108 +4,108 @@
 from kernpart import kernpart
 import numpy as np
 import hashlib
 #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
 class prod(kernpart):
    """
-    Computes the product of 2 kernels that are defined on the same space
+    Computes the product of 2 kernels
    :param k1, k2: the kernels to multiply
    :type k1, k2: kernpart
    :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
    :type tensor: Boolean
    :rtype: kernel object
    """
-    def __init__(self,k1,k2):
+    def __init__(self,k1,k2,tensor=False):
        assert k1.D == k2.D, "Error: The input spaces of the kernels to multiply must have the same dimension"
        self.D = k1.D
        self.Nparam = k1.Nparam + k2.Nparam
        self.name = k1.name + '<times>' + k2.name
        self.k1 = k1
        self.k2 = k2
        if tensor:
            self.D = k1.D + k2.D
            self.slice1 = slice(0,self.k1.D)
            self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D)
        else:
            assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension."
            self.D = k1.D
            self.slice1 = slice(0,self.D)
            self.slice2 = slice(0,self.D)
        self._X, self._X2, self._params = np.empty(shape=(3,1))
        self._set_params(np.hstack((k1._get_params(),k2._get_params())))
    def _get_params(self):
        """return the value of the parameters."""
-        return self.params
+        return np.hstack((self.k1._get_params(), self.k2._get_params()))
    def _set_params(self,x):
        """set the value of the parameters."""
        self.k1._set_params(x[:self.k1.Nparam])
        self.k2._set_params(x[self.k1.Nparam:])
        self.params = x
    def _get_param_names(self):
        """return parameter names."""
        return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
    def K(self,X,X2,target):
-        """Compute the covariance matrix between X and X2."""
+        self._K_computations(X,X2)
-        if X2 is None:
+        target += self._K1 * self._K2
            target1 = np.zeros((X.shape[0],X2.shape[0]))
            target2 = np.zeros((X.shape[0],X2.shape[0]))
        else:
            target1 = np.zeros((X.shape[0],X.shape[0]))
            target2 = np.zeros((X.shape[0],X.shape[0]))
        self.k1.K(X,X2,target1)
        self.k2.K(X,X2,target2)
        target += target1 * target2
    def Kdiag(self,X,target):
        """Compute the diagonal of the covariance matrix associated to X."""
        target1 = np.zeros((X.shape[0],))
        target2 = np.zeros((X.shape[0],))
        self.k1.Kdiag(X,target1)
        self.k2.Kdiag(X,target2)
        target += target1 * target2
    def dK_dtheta(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to the parameters."""
-        if X2 is None: X2 = X
+        self._K_computations(X,X2)
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        if X2 is None:
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
+            self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam])
-        self.k1.K(X,X2,K1)
+            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:])
-        self.k2.K(X,X2,K2)
+        else:
            self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam])
            self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:])
-        k1_target = np.zeros(self.k1.Nparam)
+    def Kdiag(self,X,target):
-        k2_target = np.zeros(self.k2.Nparam)
+        """Compute the diagonal of the covariance matrix associated to X."""
-        self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
+        target1 = np.zeros(X.shape[0])
-        self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
+        target2 = np.zeros(X.shape[0])
        self.k1.Kdiag(X[:,self.slice1],target1)
        self.k2.Kdiag(X[:,self.slice2],target2)
        target += target1 * target2
-        target[:self.k1.Nparam] += k1_target
+    def dKdiag_dtheta(self,dL_dKdiag,X,target):
-        target[self.k1.Nparam:] += k2_target
+        K1 = np.zeros(X.shape[0])
        K2 = np.zeros(X.shape[0])
        self.k1.Kdiag(X[:,self.slice1],K1)
        self.k2.Kdiag(X[:,self.slice2],K2)
        self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam])
        self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:])
    def dK_dX(self,dL_dK,X,X2,target):
        """derivative of the covariance matrix with respect to X."""
-        if X2 is None: X2 = X
+        self._K_computations(X,X2)
-        K1 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target)
-        K2 = np.zeros((X.shape[0],X2.shape[0]))
+        self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target)
        self.k1.K(X,X2,K1)
        self.k2.K(X,X2,K2)
-        self.k1.dK_dX(dL_dK*K2, X, X2, target)
+    def dKdiag_dX(self, dL_dKdiag, X, target):
-        self.k2.dK_dX(dL_dK*K1, X, X2, target)
+        K1 = np.zeros(X.shape[0])
        K2 = np.zeros(X.shape[0])
        self.k1.Kdiag(X[:,self.slice1],K1)
        self.k2.Kdiag(X[:,self.slice2],K2)
-    def dKdiag_dX(self,dL_dKdiag,X,target):
+        self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target)
-        target1 = np.zeros((X.shape[0],))
+        self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target)
        target2 = np.zeros((X.shape[0],))
        self.k1.Kdiag(X,target1)
        self.k2.Kdiag(X,target2)
-        self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
+    def _K_computations(self,X,X2):
-        self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
+        if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
-
+            self._X = X.copy()
-    def dKdiag_dtheta(self,dL_dKdiag,X,target):
+            self._params == self._get_params().copy()
-        """Compute the diagonal of the covariance matrix associated to X."""
+            if X2 is None:
-        target1 = np.zeros((X.shape[0],))
+                self._X2 = None
-        target2 = np.zeros((X.shape[0],))
+                self._K1 = np.zeros((X.shape[0],X.shape[0]))
-        self.k1.Kdiag(X,target1)
+                self._K2 = np.zeros((X.shape[0],X.shape[0]))
-        self.k2.Kdiag(X,target2)
+                self.k1.K(X[:,self.slice1],None,self._K1)
-
+                self.k2.K(X[:,self.slice2],None,self._K2)
-        k1_target = np.zeros(self.k1.Nparam)
+            else:
-        k2_target = np.zeros(self.k2.Nparam)
+                self._X2 = X2.copy()
-        self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
+                self._K1 = np.zeros((X.shape[0],X2.shape[0]))
-        self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
+                self._K2 = np.zeros((X.shape[0],X2.shape[0]))
-
+                self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
-        target[:self.k1.Nparam] += k1_target
+                self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
        target[self.k1.Nparam:] += k2_target
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -136,14 +136,16 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
                self._savedparams.append([self.f_call, self._get_params()])
                self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
                self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
-                sf2 = self.scale_factor ** 2
+#                 sf2 = self.scale_factor ** 2
                if self.likelihood.is_heteroscedastic:
                    A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
-                    B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
+#                     B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
                    B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
                else:
                    A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
-                    B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
+#                     B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
-                C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
+                    B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
                C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2))
                D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
                self._savedABCD.append([self.f_call, A, B, C, D])
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -273,7 +273,7 @@ class MRD(model):
    def _handle_plotting(self, fig_num, axes, plotf):
        if axes is None:
-            fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
+            fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3))
        for i, g in enumerate(self.bgplvms):
            if axes is None:
                ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
--- a/GPy/models/sparse_GP.py
+++ b/GPy/models/sparse_GP.py
@ -104,7 +104,7 @@ class sparse_GP(GP):
                tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
                self.A = tdot(tmp)
            else:
-#                 tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
+                # tmp = self.psi1 * (np.sqrt(self.likelihood.precision) / sf)
                tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
                tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
                self.A = tdot(tmp)
@ -163,7 +163,7 @@ class sparse_GP(GP):
        else:
            # likelihood is not heterscedatic
            self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
-#             self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
+            #  self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision * sf2)
            self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
            self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
@ -177,7 +177,7 @@ class sparse_GP(GP):
 #             B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
            B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
        else:
-            A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
+            A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
 #             B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
            B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
 #         C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5 * self.M * np.log(sf2))
--- a/doc/Figures/tuto_kern_overview_mANOVA.png
+++ b/doc/Figures/tuto_kern_overview_mANOVA.png
--- a/doc/Figures/tuto_kern_overview_mANOVAdec.png
+++ b/doc/Figures/tuto_kern_overview_mANOVAdec.png
--- a/doc/Figures/tuto_kern_overview_multadd.png
+++ b/doc/Figures/tuto_kern_overview_multadd.png
--- a/doc/tuto_kernel_overview.rst
+++ b/doc/tuto_kernel_overview.rst
@ -55,18 +55,18 @@ In ``GPy``, kernel objects can be added or multiplied. In both cases, two kinds
    * a kernel over :math:`\mathbb{R} \times \mathbb{R}`:  :math:`k(x,y) = k_1(x,y) \times k_2(x,y)`
    * a kernel over :math:`\mathbb{R}^2 \times \mathbb{R}^2`:  :math:`k(\mathbf{x},\mathbf{y}) = k_1(x_1,y_1) \times k_2(x_2,y_2)`
-These two options are available in GPy under the name ``prod`` and ``prod_orthogonal`` (resp. ``add`` and ``add_orthogonal`` for the addition). Here is a quick example ::
+These two options are available in GPy using the flag ``tensor`` in the ``add`` and ``prod`` functions. Here is a quick example ::
    k1 = GPy.kern.rbf(1,1.,2.)
    k2 = GPy.kern.Matern32(1, 0.5, 0.2)
    # Product of kernels
-    k_prod = k1.prod(k2)
+    k_prod = k1.prod(k2)                        # By default, tensor=False
-    k_prodorth = k1.prod_orthogonal(k2)
+    k_prodtens = k1.prod(k2,tensor=True)
    # Sum of kernels
-    k_add = k1.add(k2)
+    k_add = k1.add(k2)                          # By default, tensor=False
-    k_addorth = k1.add_orthogonal(k2)    
+    k_addtens = k1.add(k2,tensor=True)    
 ..  # plots
    pb.figure(figsize=(8,8))
@ -74,21 +74,21 @@ These two options are available in GPy under the name ``prod`` and ``prod_orthog
    k_prod.plot()
    pb.title('prod')
    pb.subplot(2,2,2)
-    k_prodorth.plot()
+    k_prodtens.plot()
-    pb.title('prod_orthogonal')
+    pb.title('tensor prod')
    pb.subplot(2,2,3)
    k_add.plot()
-    pb.title('add')
+    pb.title('sum')
    pb.subplot(2,2,4)
-    k_addorth.plot()
+    k_addtens.plot()
-    pb.title('add_orthogonal')
+    pb.title('tensor sum')
    pb.subplots_adjust(wspace=0.3, hspace=0.3)
 .. figure::  Figures/tuto_kern_overview_multadd.png
    :align:  center
    :height: 500px
-A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
+A shortcut for ``add`` and ``prod`` (with default flag ``tensor=False``) is provided by the usual ``+`` and ``*`` operators. Here is another example where we create a periodic kernel with some decay ::
    k1 = GPy.kern.rbf(1,1.,2)
    k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
@ -113,7 +113,7 @@ A shortcut for ``add`` and ``prod`` is provided by the usual ``+`` and ``*`` ope
    :align:  center
    :height: 300px
-In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions denied on the same space. For example, the following code ::
+In general, ``kern`` objects can be seen as a sum of ``kernparts`` objects, where the later are covariance functions defined on the same space. For example, the following code ::
    k = (k1+k2)*(k1+k2)
    print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
@ -184,7 +184,7 @@ Let us assume that we want to define an ANOVA kernel with a Matern 3/2 kernel fo
    k_cst = GPy.kern.bias(1,variance=1.)
    k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
-    Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
+    Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
    print Kanova
 Printing the resulting kernel outputs the following ::
@ -236,14 +236,14 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
    pb.subplot(1,5,2)
    pb.ylabel("=   ",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,3)
-    m.plot(which_functions=[False,True,False,False])
+    m.plot(which_parts=[False,True,False,False])
    pb.ylabel("cst          +",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,4)
-    m.plot(which_functions=[False,False,True,False])
+    m.plot(which_parts=[False,False,True,False])
    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
    pb.subplot(1,5,5)
    pb.ylabel("+   ",rotation='horizontal',fontsize='30')
-    m.plot(which_functions=[False,False,False,True])
+    m.plot(which_parts=[False,False,False,True])
 ..  pb.savefig('tuto_kern_overview_mANOVAdec.png',bbox_inches='tight')
@ -252,7 +252,8 @@ The submodels can be represented with the option ``which_function`` of ``plot``:
    :height: 250px
-..  import pylab as pb
+..  # code
    import pylab as pb
    import numpy as np
    import GPy
    pb.ion()