Merged __init__

2026-05-15 06:52:39 +02:00 · 2016-06-02 15:59:10 +01:00 · 2016-06-02 15:59:10 +01:00 · 163f5bcb04
commit 163f5bcb04
parent eeb35621cc 52fb928dff
138 changed files with 1952 additions and 2393 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -9,17 +9,16 @@ omit = ./GPy/testing/*.py, travis_tests.py, setup.py, ./GPy/__version__.py
 exclude_lines =
    # Have to re-enable the standard pragma
    pragma: no cover
-
+    verbose
    # Don't complain about missing debug-only code:
    if self\.debug
    # Don't complain if tests don't hit defensive assertion code:
-    raise AssertionError
+    raise
    raise NotImplementedError
    raise NotImplemented
    except
    pass
    Not implemented
    # Don't complain if non-runnable code isn't run:
    if 0:
--- a/.travis.yml
+++ b/.travis.yml
@ -32,6 +32,7 @@ install:
 - pip install pypandoc
 - pip install git+git://github.com/BRML/climin.git
 - pip install autograd
 - pip install nose-show-skipped
 - python setup.py develop
 script:
@ -47,9 +48,11 @@ before_deploy:
  - make html
  - cd ../
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]];
-      then export DIST='sdist';
+    then
      export DIST='sdist';
    elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
-      then export DIST='bdist_wheel';
+    then
      export DIST='bdist_wheel';
    fi;
 deploy:
@ -60,5 +63,6 @@ deploy:
  on:
    tags: true
    branch: deploy
  #condition: "$TRAVIS_OS_NAME" == "osx" || ( "$TRAVIS_OS_NAME" == "linux" && "$PYTHON_VERSION" == "2.7" )
  distributions: $DIST
  skip_cleanup: true
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@ -1 +1 @@
-[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors)
+GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors
--- a/GPy/version.py
+++ b/GPy/version.py
@ -1 +1 @@
-__version__ = "1.0.2"
+__version__ = "1.0.9"
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -148,14 +148,16 @@ class GP(Model):
                # LVM models
                if isinstance(self.X, VariationalPosterior):
                    assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    self.X = X
-                    self.link_parameter(self.X)
+                    self.link_parameter(self.X, index=index)
                else:
                    index = self.X._parent_index_
                    self.unlink_parameter(self.X)
                    from ..core import Param
-                    self.X = Param('latent mean',X)
+                    self.X = Param('latent mean', X)
-                    self.link_parameter(self.X)
+                    self.link_parameter(self.X, index=index)
            else:
                self.X = ObsAr(X)
        self.update_model(True)
@ -437,15 +439,22 @@ class GP(Model):
        warnings.warn("Wrong naming, use predict_wishart_embedding instead. Will be removed in future versions!", DeprecationWarning)
        return self.predict_wishart_embedding(Xnew, kern, mean, covariance)
-    def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
+    def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True, dimensions=None):
        """
        Predict the magnification factor as
        sqrt(det(G))
-        for each point N in Xnew
+        for each point N in Xnew.
        :param bool mean: whether to include the mean of the wishart embedding.
        :param bool covariance: whether to include the covariance of the wishart embedding.
        :param array-like dimensions: which dimensions of the input space to use [defaults to self.get_most_significant_input_dimensions()[:2]]
        """
        G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
        if dimensions is None:
            dimensions = self.get_most_significant_input_dimensions()[:2]
        G = G[:, dimensions][:,:,dimensions]
        from ..util.linalg import jitchol
        mag = np.empty(Xnew.shape[0])
        for n in range(Xnew.shape[0]):
@ -525,21 +534,23 @@ class GP(Model):
    def get_most_significant_input_dimensions(self, which_indices=None):
        return self.kern.get_most_significant_input_dimensions(which_indices)
-    def optimize(self, optimizer=None, start=None, **kwargs):
+    def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
        """
        Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
        kwargs are passed to the optimizer. They can be:
-        :param max_f_eval: maximum number of function evaluations
+        :param max_iters: maximum number of function evaluations
-        :type max_f_eval: int
+        :type max_iters: int
        :messages: whether to display during optimisation
        :type messages: bool
        :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
        :type optimizer: string
        :param bool ipython_notebook: whether to use ipython notebook widgets or not.
        :param bool clear_after_finish: if in ipython notebook, we can clear the widgets after optimization.
        """
        self.inference_method.on_optimization_start()
        try:
-            super(GP, self).optimize(optimizer, start, **kwargs)
+            super(GP, self).optimize(optimizer, start, messages, max_iters, ipython_notebook, clear_after_finish, **kwargs)
        except KeyboardInterrupt:
            print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
            self.inference_method.on_optimization_end()
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -340,7 +340,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
                   gtol=.05)
    if plot:
        m.X.plot("BGPLVM Latent Space 1D")
-        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
+        m.kern.plot_ARD()
    return m
 def gplvm_simulation(optimize=True, verbose=1,
@ -364,7 +364,7 @@ def gplvm_simulation(optimize=True, verbose=1,
                   gtol=.05)
    if plot:
        m.X.plot("BGPLVM Latent Space 1D")
-        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
+        m.kern.plot_ARD()
    return m
 def ssgplvm_simulation(optimize=True, verbose=1,
                      plot=True, plot_sim=False,
@ -388,7 +388,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
                   gtol=.05)
    if plot:
        m.X.plot("SSGPLVM Latent Space 1D")
-        m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters')
+        m.kern.plot_ARD()
    return m
 def bgplvm_simulation_missing_data(optimize=True, verbose=1,
@ -418,7 +418,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
                   gtol=.05)
    if plot:
        m.X.plot("BGPLVM Latent Space 1D")
-        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
+        m.kern.plot_ARD()
    return m
 def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
@ -448,7 +448,7 @@ def bgplvm_simulation_missing_data_stochastics(optimize=True, verbose=1,
                   gtol=.05)
    if plot:
        m.X.plot("BGPLVM Latent Space 1D")
-        m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
+        m.kern.plot_ARD()
    return m
@ -469,7 +469,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
        m.optimize(messages=verbose, max_iters=8e3)
    if plot:
        m.X.plot("MRD Latent Space 1D")
-        m.plot_scales("MRD Scales")
+        m.plot_scales()
    return m
 def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
@ -496,7 +496,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
        m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
    if plot:
        m.X.plot("MRD Latent Space 1D")
-        m.plot_scales("MRD Scales")
+        m.plot_scales()
    return m
 def brendan_faces(optimize=True, verbose=True, plot=True):
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -40,6 +40,14 @@ class EPBase(object):
        # TODO: update approximation in the end as well? Maybe even with a switch?
        pass
    def __setstate__(self, state):
        super(EPBase, self).__setstate__(state[0])
        self.epsilon, self.eta, self.delta = state[1]
        self.reset()
    def __getstate__(self):
        return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
 class EP(EPBase, ExactGaussianInference):
    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
        if self.always_reset:
@ -51,7 +59,7 @@ class EP(EPBase, ExactGaussianInference):
        if K is None:
            K = kern.K(X)
-        if self._ep_approximation is None:
+        if getattr(self, '_ep_approximation', None) is None:
            #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
            mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
        else:
@ -159,7 +167,7 @@ class EPDTC(EPBase, VarDTC):
        else:
            Kmn = psi1.T
-        if self._ep_approximation is None:
+        if getattr(self, '_ep_approximation', None) is None:
            mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
        else:
            mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -10,7 +10,7 @@ from .src.add import Add
 from .src.prod import Prod
 from .src.rbf import RBF
 from .src.linear import Linear, LinearFull
-from .src.static import Bias, White, Fixed, WhiteHeteroscedastic
+from .src.static import Bias, White, Fixed, WhiteHeteroscedastic, Precomputed
 from .src.brownian import Brownian
 from .src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
 from .src.mlp import MLP
@ -27,6 +27,7 @@ from .src.eq_ode2 import EQ_ODE2
 from .src.integral import Integral
 from .src.integral_limits import Integral_Limits
 from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
 from .src.eq_ode1 import EQ_ODE1
 from .src.trunclinear import TruncLinear,TruncLinear_inf
 from .src.splitKern import SplitKern,DEtime
 from .src.splitKern import DEtime as DiffGenomeKern
--- a/GPy/kern/src/eq_ode1.py
+++ b/GPy/kern/src/eq_ode1.py
@ -0,0 +1,649 @@
 # Copyright (c) 2014, Cristian Guarnizo.
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from scipy.special import erf, erfcx
 from .kern import Kern
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 from paramz.caching import Cache_this
 class EQ_ODE1(Kern):
    """
    Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
    This outputs of this kernel have the form
    .. math::
       \frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} u_i(t-\delta_j) - d_jy_j(t)
    where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`u_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
    :param output_dim: number of outputs driven by latent function.
    :type output_dim: int
    :param W: sensitivities of each output to the latent driving function. 
    :type W: ndarray (output_dim x rank).
    :param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
    :type rank: int
    :param decay: decay rates for the first order system. 
    :type decay: array of length output_dim.
    :param delay: delay between latent force and output response.
    :type delay: array of length output_dim.
    :param kappa: diagonal term that allows each latent output to have an independent component to the response.
    :type kappa: array of length output_dim.
    .. Note: see first order differential equation examples in GPy.examples.regression for some usage.
    """
    def __init__(self, input_dim=2, output_dim=1, rank=1, W = None, lengthscale=None,  decay=None, active_dims=None, name='eq_ode1'):
        assert input_dim == 2, "only defined for 1 input dims"
        super(EQ_ODE1, self).__init__(input_dim=input_dim, active_dims=active_dims, name=name)
        self.rank = rank
        self.output_dim = output_dim
        if lengthscale is None:
            lengthscale = .5 + np.random.rand(self.rank)
        else:
            lengthscale = np.asarray(lengthscale)
            assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
            if lengthscale.size != self.rank:
                lengthscale = np.ones(self.rank)*lengthscale
        if W is None:
            W = .5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
        else:
            assert W.shape == (self.output_dim, self.rank)
        if decay is None:
            decay = np.ones(self.output_dim)
        else:
            decay = np.asarray(decay)
            assert decay.size in [1, self.output_dim], "Bad number of decay"
            if decay.size != self.output_dim:
                decay = np.ones(self.output_dim)*decay
 #        if kappa is None:
 #            self.kappa = np.ones(self.output_dim)
 #        else:
 #            kappa = np.asarray(kappa)
 #            assert kappa.size in [1, self.output_dim], "Bad number of kappa"
 #            if decay.size != self.output_dim:
 #                decay = np.ones(self.output_dim)*kappa
        #self.kappa = Param('kappa', kappa, Logexp())
        #self.delay = Param('delay', delay, Logexp())
        #self.is_normalized = True
        #self.is_stationary = False
        #self.gaussian_initial = False
        self.lengthscale = Param('lengthscale', lengthscale, Logexp())
        self.decay = Param('decay', decay, Logexp())
        self.W = Param('W', W)
        self.link_parameters(self.lengthscale, self.decay, self.W)
    @Cache_this(limit=3)
    def K(self, X, X2=None):
        #This way is not working, indexes are lost after using k._slice_X
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
        index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        if X2 is None:
            if X_flag:
                #Calculate covariance function for the latent functions
                index -= self.output_dim
                return self._Kuu(X, index)
            else:
                raise NotImplementedError
        else:
            #This way is not working, indexes are lost after using k._slice_X
            #index2 = np.asarray(X2, dtype=np.int)
            #index2 = index2.reshape(index2.size,)
            if hasattr(X2, 'values'):
                X2 = X2.values
            index2 = np.int_(np.round(X2[:, 1]))
            index2 = index2.reshape(index2.size,)
            X2_flag = index2[0] >= self.output_dim
            #Calculate cross-covariance function
            if not X_flag and X2_flag:
                index2 -= self.output_dim
                return self._Kfu(X, index, X2, index2) #Kfu
            elif X_flag and not X2_flag:
                index -= self.output_dim
                return self._Kfu(X2, index2, X, index).T #Kuf
            elif X_flag and X2_flag:
                index -= self.output_dim
                index2 -= self.output_dim
                return self._Kusu(X, index, X2, index2) #Ku_s u
            else:
                raise NotImplementedError #Kf_s f
    #Calculate the covariance function for diag(Kff(X,X))
    def Kdiag(self, X):
        if hasattr(X, 'values'):
            index = np.int_(np.round(X[:, 1].values))
        else:
            index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        if X_flag: #Kuudiag        
            return np.ones(X[:,0].shape)
        else: #Kffdiag
            kdiag = self._Kdiag(X)
            return np.sum(kdiag, axis=1)
    def _Kdiag(self, X):
        #This way is not working, indexes are lost after using k._slice_X
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
        index = np.int_(X[:, 1])
        index = index.reshape(index.size,)
        #terms that move along t
        t = X[:, 0].reshape(X.shape[0], 1)
        d = np.unique(index) #Output Indexes
        B = self.decay.values[d]
        S = self.W.values[d, :]
        #Index transformation
        indd = np.arange(self.output_dim)
        indd[d] = np.arange(d.size)
        index = indd[index]
        B = B.reshape(B.size, 1)
        #Terms that move along q
        lq = self.lengthscale.values.reshape(1, self.rank)
        S2 = S*S
        kdiag = np.empty((t.size, ))
        #Dx1 terms
        c0 = (S2/B)*((.5*np.sqrt(np.pi))*lq)
        #DxQ terms
        nu = lq*(B*.5)
        nu2 = nu*nu
        #Nx1 terms
        gamt = -2.*B
        gamt = gamt[index]*t
        #NxQ terms
        t_lq = t/lq
        # Upsilon Calculations
        # Using wofz
        #erfnu = erf(nu)
        upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :] ,t_lq+nu[index,:] ))
        upm[t[:, 0] == 0, :] = 0.
        upv = np.exp(nu2[index, :] + gamt + lnDifErf( -t_lq+nu[index,:], nu[index, :] ) )
        upv[t[:, 0] == 0, :] = 0.
        #Covariance calculation
        #kdiag = np.sum(c0[index, :]*(upm-upv), axis=1)
        kdiag = c0[index, :]*(upm-upv)
        return kdiag
    def update_gradients_full(self, dL_dK, X, X2 = None):
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
        self.decay.gradient = np.zeros(self.decay.shape)
        self.W.gradient = np.zeros(self.W.shape)
        self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
        index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        if X2 is None:
            if X_flag: #Kuu or Kmm
                index -= self.output_dim
                tmp = dL_dK*self._gkuu_lq(X, index)
                for q in np.unique(index):
                    ind = np.where(index == q)
                    self.lengthscale.gradient[q] = tmp[np.ix_(ind[0], ind[0])].sum()
            else:
                raise NotImplementedError
        else: #Kfu or Knm
            #index2 = np.asarray(X2, dtype=np.int)
            #index2 = index2.reshape(index2.size,)
            if hasattr(X2, 'values'):
                X2 = X2.values
            index2 = np.int_(np.round(X2[:, 1]))
            index2 = index2.reshape(index2.size,)
            X2_flag = index2[0] >= self.output_dim
            if not X_flag and X2_flag: #Kfu
                index2 -= self.output_dim
            else: #Kuf
                dL_dK = dL_dK.T #so we obtaing dL_Kfu
                indtemp = index - self.output_dim
                Xtemp = X
                X = X2
                X2 = Xtemp
                index = index2
                index2 = indtemp
            glq, gSdq, gB = self._gkfu(X, index, X2, index2)
            tmp = dL_dK*glq
            for q in np.unique(index2):
                ind = np.where(index2 == q)
                self.lengthscale.gradient[q] = tmp[:, ind].sum()
            tmpB = dL_dK*gB
            tmp = dL_dK*gSdq
            for d in np.unique(index):
                ind = np.where(index == d)
                self.decay.gradient[d] = tmpB[ind, :].sum()
                for q in np.unique(index2):
                    ind2 = np.where(index2 == q)
                    self.W.gradient[d, q] = tmp[np.ix_(ind[0], ind2[0])].sum()
    def update_gradients_diag(self, dL_dKdiag, X):
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
        self.decay.gradient = np.zeros(self.decay.shape)
        self.W.gradient = np.zeros(self.W.shape)
        self.lengthscale.gradient = np.zeros(self.lengthscale.shape)
        index = np.int_(X[:, 1])
        index = index.reshape(index.size,)
        glq, gS, gB = self._gkdiag(X, index)
        if dL_dKdiag.size == X.shape[0]:
            dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
        tmp = dL_dKdiag*glq
        self.lengthscale.gradient = tmp.sum(0)
        tmpB = dL_dKdiag*gB
        tmp = dL_dKdiag*gS
        for d in np.unique(index):
            ind = np.where(index == d)
            self.decay.gradient[d] = tmpB[ind, :].sum()
            self.W.gradient[d, :] = tmp[ind].sum(0)
    def gradients_X(self, dL_dK, X, X2=None):
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
        index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        #If input_dim == 1, use this
        #gX = np.zeros((X.shape[0], 1))
        #Cheat to allow gradient for input_dim==2
        gX = np.zeros(X.shape)
        if X2 is None: #Kuu or Kmm
            if X_flag:
                index -= self.output_dim
                gX[:, 0] = 2.*(dL_dK*self._gkuu_X(X, index)).sum(0)
                return gX
            else:
                raise NotImplementedError
        else: #Kuf or Kmn
            #index2 = np.asarray(X2, dtype=np.int)
            #index2 = index2.reshape(index2.size,)
            if hasattr(X2, 'values'):
                X2 = X2.values
            index2 = np.int_(np.round(X2[:, 1]))
            index2 = index2.reshape(index2.size,)
            X2_flag = index2[0] >= self.output_dim
            if X_flag and not X2_flag: #gradient of Kuf(Z, X) wrt Z
                index -= self.output_dim
                gX[:, 0] = (dL_dK*self._gkfu_z(X2, index2, X, index).T).sum(1)
                return gX
            else:
                raise NotImplementedError
    #---------------------------------------#
    #             Helper functions          #
    #---------------------------------------#
    #Evaluation of squared exponential for LFM
    def _Kuu(self, X, index):
        index = index.reshape(index.size,)
        t = X[:, 0].reshape(X.shape[0],)
        lq = self.lengthscale.values.reshape(self.rank,)
        lq2 = lq*lq
        #Covariance matrix initialization
        kuu = np.zeros((t.size, t.size))
        #Assign 1. to diagonal terms
        kuu[np.diag_indices(t.size)] = 1.
        #Upper triangular indices
        indtri1, indtri2 = np.triu_indices(t.size, 1)
        #Block Diagonal indices among Upper Triangular indices
        ind = np.where(index[indtri1] == index[indtri2])
        indr = indtri1[ind]
        indc = indtri2[ind]
        r = t[indr] - t[indc]
        r2 = r*r
        #Calculation of  covariance function
        kuu[indr, indc] = np.exp(-r2/lq2[index[indr]])
        #Completion of lower triangular part
        kuu[indc, indr] = kuu[indr, indc]
        return kuu
    def _Kusu(self, X, index, X2, index2):
        index = index.reshape(index.size,)
        index2 = index2.reshape(index2.size,)
        t = X[:, 0].reshape(X.shape[0],1)
        t2 = X2[:, 0].reshape(1,X2.shape[0])
        lq = self.lengthscale.values.reshape(self.rank,)
        #Covariance matrix initialization
        kuu = np.zeros((t.size, t2.size))
        for q in range(self.rank):
            ind1 = index == q
            ind2 = index2 == q
            r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
            r2 = r*r
            #Calculation of  covariance function
            kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
        return kuu
    #Evaluation of cross-covariance function
    def _Kfu(self, X, index, X2, index2):
        #terms that move along t
        t = X[:, 0].reshape(X.shape[0], 1)
        d = np.unique(index) #Output Indexes
        B = self.decay.values[d]
        S = self.W.values[d, :]
        #Index transformation
        indd = np.arange(self.output_dim)
        indd[d] = np.arange(d.size)
        index = indd[index]
        #Output related variables must be column-wise
        B = B.reshape(B.size, 1)
        #Input related variables must be row-wise
        z = X2[:, 0].reshape(1, X2.shape[0])
        lq = self.lengthscale.values.reshape((1, self.rank))
        kfu = np.empty((t.size, z.size))
        #DxQ terms
        c0 = S*((.5*np.sqrt(np.pi))*lq)
        nu = B*(.5*lq)
        nu2 = nu**2
        #1xM terms
        z_lq = z/lq[0, index2]
        #NxM terms
        tz = t-z
        tz_lq = tz/lq[0, index2]
        # Upsilon Calculations
        fullind = np.ix_(index, index2)
        upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind]))
        upsi[t[:, 0] == 0, :] = 0.
        #Covariance calculation
        kfu = c0[fullind]*upsi
        return kfu
    #Gradient of Kuu wrt lengthscale
    def _gkuu_lq(self, X, index):
        t = X[:, 0].reshape(X.shape[0],)
        index = index.reshape(X.shape[0],)
        lq = self.lengthscale.values.reshape(self.rank,)
        lq2 = lq*lq
        #Covariance matrix initialization
        glq = np.zeros((t.size, t.size))
        #Upper triangular indices
        indtri1, indtri2 = np.triu_indices(t.size, 1)
        #Block Diagonal indices among Upper Triangular indices
        ind = np.where(index[indtri1] == index[indtri2])
        indr = indtri1[ind]
        indc = indtri2[ind]
        r = t[indr] - t[indc]
        r2 = r*r
        r2_lq2 = r2/lq2[index[indr]]
        #Calculation of  covariance function
        er2_lq2 = np.exp(-r2_lq2)
        #Gradient wrt lq
        c = 2.*r2_lq2/lq[index[indr]]
        glq[indr, indc] = er2_lq2*c
        #Complete the lower triangular
        glq[indc, indr] = glq[indr, indc]
        return glq
    #Be careful this derivative should be transpose it
    def _gkuu_X(self, X, index): #Diagonal terms are always zero
        t = X[:, 0].reshape(X.shape[0],)
        index = index.reshape(index.size,)
        lq = self.lengthscale.values.reshape(self.rank,)
        lq2 = lq*lq
        #Covariance matrix initialization
        gt = np.zeros((t.size, t.size))
        #Upper triangular indices
        indtri1, indtri2 = np.triu_indices(t.size, 1) #Offset of 1 from the diagonal
        #Block Diagonal indices among Upper Triangular indices
        ind = np.where(index[indtri1] == index[indtri2])
        indr = indtri1[ind]
        indc = indtri2[ind]
        r = t[indr] - t[indc]
        r2 = r*r
        r2_lq2 = r2/(-lq2[index[indr]])
        #Calculation of  covariance function
        er2_lq2 = np.exp(r2_lq2)
        #Gradient wrt t
        c = 2.*r/lq2[index[indr]]
        gt[indr, indc] = er2_lq2*c
        #Complete the lower triangular
        gt[indc, indr] = -gt[indr, indc]
        return gt
    #Gradients for Diagonal Kff
    def _gkdiag(self, X, index):
        index = index.reshape(index.size,)
        #terms that move along t
        d = np.unique(index)
        B = self.decay[d].values
        S = self.W[d, :].values
        #Index transformation
        indd = np.arange(self.output_dim)
        indd[d] = np.arange(d.size)
        index = indd[index]
        #Output related variables must be column-wise
        t = X[:, 0].reshape(X.shape[0], 1)
        B = B.reshape(B.size, 1)
        S2 = S*S
        #Input related variables must be row-wise
        lq = self.lengthscale.values.reshape(1, self.rank)
        gB = np.empty((t.size,))
        glq = np.empty((t.size, lq.size))
        gS = np.empty((t.size, lq.size))
        #Dx1 terms
        c0 = S2*lq*np.sqrt(np.pi)
        #DxQ terms
        nu = (.5*lq)*B
        nu2 = nu*nu
        #Nx1 terms
        gamt = -B[index]*t
        egamt = np.exp(gamt)
        e2gamt = egamt*egamt
        #NxQ terms
        t_lq = t/lq
        t2_lq2 = -t_lq*t_lq
        etlq2gamt = np.exp(t2_lq2 + gamt) #NXQ
        ##Upsilon calculations
        #erfnu = erf(nu) #TODO: This can be improved
        upm = np.exp(nu2[index, :] + lnDifErf( nu[index, :], t_lq + nu[index, :]) )
        upm[t[:, 0] == 0, :] = 0.
        upv = np.exp(nu2[index, :] + 2.*gamt + lnDifErf(-t_lq + nu[index, :], nu[index, :]) ) #egamt*upv
        upv[t[:, 0] == 0, :] = 0.
        #Gradient wrt S
        c0_S = (S/B)*(lq*np.sqrt(np.pi))
        gS = c0_S[index]*(upm - upv)
        #For B
        CB1 = (.5*lq)**2 - .5/B**2 #DXQ
        lq2_2B = (.5*lq**2)*(S2/B) #DXQ
        CB2 = 2.*etlq2gamt - e2gamt - 1. #NxQ
        # gradient wrt B NxZ
        gB = c0[index, :]*(CB1[index, :]*upm - (CB1[index, :] - t/B[index])*upv) + \
        lq2_2B[index, :]*CB2
        #Gradient wrt lengthscale
        #DxQ terms
        c0 = (.5*np.sqrt(np.pi))*(S2/B)*(1.+.5*(lq*B)**2)
        Clq1 = S2*(lq*.5)
        glq = c0[index]*(upm - upv) + Clq1[index]*CB2
        return glq, gS, gB
    def _gkfu(self, X, index, Z, index2):
        index = index.reshape(index.size,)
        #TODO: reduce memory usage
        #terms that move along t
        d = np.unique(index)
        B = self.decay[d].values
        S = self.W[d, :].values
        #Index transformation
        indd = np.arange(self.output_dim)
        indd[d] = np.arange(d.size)
        index = indd[index]
        #t column
        t = X[:, 0].reshape(X.shape[0], 1)
        B = B.reshape(B.size, 1)
        #z row
        z = Z[:, 0].reshape(1, Z.shape[0])
        index2 = index2.reshape(index2.size,)
        lq = self.lengthscale.values.reshape((1, self.rank))
        #kfu = np.empty((t.size, z.size))
        glq = np.empty((t.size, z.size))
        gSdq = np.empty((t.size, z.size))
        gB = np.empty((t.size, z.size))
        #Dx1 terms
        B_2 = B*.5
        S_pi = S*(.5*np.sqrt(np.pi))
        #DxQ terms
        c0 = S_pi*lq #lq*Sdq*sqrt(pi)
        nu = B*lq*.5
        nu2 = nu*nu
        #1xM terms
        z_lq = z/lq[0, index2]
        #NxM terms
        tz = t-z
        tz_lq = tz/lq[0, index2]
        etz_lq2 = -np.exp(-tz_lq*tz_lq)
        ez_lq_Bt = np.exp(-z_lq*z_lq -B[index]*t)
        # Upsilon calculations
        fullind = np.ix_(index, index2)
        upsi = np.exp(nu2[fullind] - B[index]*tz + lnDifErf( -tz_lq + nu[fullind], z_lq+nu[fullind] ) )
        upsi[t[:, 0] == 0., :] = 0.
        #Gradient wrt S
        #DxQ term
        Sa1 = lq*(.5*np.sqrt(np.pi))
        gSdq = Sa1[0,index2]*upsi
        #Gradient wrt lq
        la1 = S_pi*(1. + 2.*nu2)
        Slq = S*lq
        uplq = etz_lq2*(tz_lq/lq[0, index2] + B_2[index])
        uplq += ez_lq_Bt*(-z_lq/lq[0, index2] + B_2[index])
        glq = la1[fullind]*upsi
        glq += Slq[fullind]*uplq
        #Gradient wrt B
        Slq = Slq*lq
        nulq = nu*lq
        upBd = etz_lq2 + ez_lq_Bt
        gB = c0[fullind]*(nulq[fullind] - tz)*upsi + .5*Slq[fullind]*upBd
        return glq, gSdq, gB
    #TODO: reduce memory usage
    def _gkfu_z(self, X, index, Z, index2): #Kfu(t,z)
        index = index.reshape(index.size,)
        #terms that move along t
        d = np.unique(index)
        B = self.decay[d].values
        S = self.W[d, :].values
        #Index transformation
        indd = np.arange(self.output_dim)
        indd[d] = np.arange(d.size)
        index = indd[index]
        #t column
        t = X[:, 0].reshape(X.shape[0], 1)
        B = B.reshape(B.size, 1)
        #z row
        z = Z[:, 0].reshape(1, Z.shape[0])
        index2 = index2.reshape(index2.size,)
        lq = self.lengthscale.values.reshape((1, self.rank))
        #kfu = np.empty((t.size, z.size))
        gz = np.empty((t.size, z.size))
        #Dx1 terms
        S_pi =S*(.5*np.sqrt(np.pi))
        #DxQ terms
        #Slq = S*lq
        c0 = S_pi*lq #lq*Sdq*sqrt(pi)
        nu = (.5*lq)*B
        nu2 = nu*nu
        #1xM terms
        z_lq = z/lq[0, index2]
        z_lq2 = -z_lq*z_lq
        #NxQ terms
        t_lq = t/lq
        #NxM terms
        zt_lq = z_lq - t_lq[:, index2]
        zt_lq2 = -zt_lq*zt_lq
        # Upsilon calculations
        fullind = np.ix_(index, index2)
        z2 = z_lq + nu[fullind]
        z1 = z2 - t_lq[:, index2]
        upsi = np.exp(nu2[fullind] - B[index]*(t-z) + lnDifErf(z1,z2) )
        upsi[t[:, 0] == 0., :] = 0.
        #Gradient wrt z
        za1 = c0*B
        #za2 = S_w
        gz = za1[fullind]*upsi + S[fullind]*( np.exp(z_lq2 - B[index]*t) -np.exp(zt_lq2) )
        return gz
 def lnDifErf(z1,z2):
    #Z2 is always positive
    logdiferf = np.zeros(z1.shape)        
    ind = np.where(z1>0.)
    ind2 = np.where(z1<=0.)
    if ind[0].shape > 0:
        z1i = z1[ind]
        z12 = z1i*z1i
        z2i = z2[ind]
        logdiferf[ind] = -z12 + np.log(erfcx(z1i) - erfcx(z2i)*np.exp(z12-z2i**2))
    if ind2[0].shape > 0:
        z1i = z1[ind2]
        z2i = z2[ind2]
        logdiferf[ind2] = np.log(erf(z2i) - erf(z1i))
    return logdiferf
--- a/GPy/kern/src/eq_ode2.py
+++ b/GPy/kern/src/eq_ode2.py
@ -44,7 +44,7 @@ class EQ_ODE2(Kern):
            lengthscale = np.asarray(lengthscale)
            assert lengthscale.size in [1, self.rank], "Bad number of lengthscales"
            if lengthscale.size != self.rank:
-                lengthscale = np.ones(self.input_dim)*lengthscale
+                lengthscale = np.ones(self.rank)*lengthscale
        if W is None:
            #W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
@ -71,7 +71,7 @@ class EQ_ODE2(Kern):
        #index = index.reshape(index.size,)
        if hasattr(X, 'values'):
            X = X.values
-        index = np.int_(X[:, 1])
+        index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        if X2 is None:
@ -79,7 +79,7 @@ class EQ_ODE2(Kern):
                #Calculate covariance function for the latent functions
                index -= self.output_dim
                return self._Kuu(X, index)
-            else:
+            else: #Kff full
                raise NotImplementedError
        else:
            #This way is not working, indexes are lost after using k._slice_X
@ -87,19 +87,40 @@ class EQ_ODE2(Kern):
            #index2 = index2.reshape(index2.size,)
            if hasattr(X2, 'values'):
                X2 = X2.values
-            index2 = np.int_(X2[:, 1])
+            index2 = np.int_(np.round(X2[:, 1]))
            index2 = index2.reshape(index2.size,)
            X2_flag = index2[0] >= self.output_dim
            #Calculate cross-covariance function
            if not X_flag and X2_flag:
                index2 -= self.output_dim
                return self._Kfu(X, index, X2, index2) #Kfu
-            else:
+            elif X_flag and not X2_flag:
                index -= self.output_dim
                return self._Kfu(X2, index2, X, index).T #Kuf
            elif X_flag and X2_flag:
                index -= self.output_dim
                index2 -= self.output_dim
                return self._Kusu(X, index, X2, index2) #Ku_s u
            else:
                raise NotImplementedError #Kf_s f
    #Calculate the covariance function for diag(Kff(X,X))
    def Kdiag(self, X):
        if hasattr(X, 'values'):
            index = np.int_(np.round(X[:, 1].values))
        else:
            index = np.int_(np.round(X[:, 1]))
        index = index.reshape(index.size,)
        X_flag = index[0] >= self.output_dim
        if X_flag: #Kuudiag        
            return np.ones(X[:,0].shape)
        else: #Kffdiag
            kdiag = self._Kdiag(X)
            return np.sum(kdiag, axis=1)
    #Calculate the covariance function for diag(Kff(X,X))
    def _Kdiag(self, X):
        #This way is not working, indexes are lost after using k._slice_X
        #index = np.asarray(X, dtype=np.int)
        #index = index.reshape(index.size,)
@ -132,7 +153,7 @@ class EQ_ODE2(Kern):
        #Terms that move along q
        lq = self.lengthscale.values.reshape(1, self.lengthscale.size)
        S2 = S*S
-        kdiag = np.empty((t.size, ))
+        kdiag = np.empty((t.size, lq.size))
        indD = np.arange(B.size)
        #(1) When wd is real
@ -187,8 +208,8 @@ class EQ_ODE2(Kern):
            upv[t1[:, 0] == 0, :] = 0.
            #Covariance calculation
-            kdiag[ind3t] = np.sum(np.real(K01[ind]*upm), axis=1)
+            kdiag[ind3t] = np.real(K01[ind]*upm)
-            kdiag[ind3t] += np.sum(np.real((c0[ind]*ec)*upv), axis=1)
+            kdiag[ind3t] += np.real((c0[ind]*ec)*upv)
        #(2) When w_d is complex
        if np.any(wbool):
@ -265,7 +286,7 @@ class EQ_ODE2(Kern):
            upvc[t1[:, 0] == 0, :] = 0.
            #Covariance calculation
-            kdiag[ind2t] = np.sum(K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc, axis=1)
+            kdiag[ind2t] = K011[ind]*upm + K012[ind]*upmc + (c0[ind]*ec)*upv + (c0[ind]*ec2)*upvc
        return kdiag
    def update_gradients_full(self, dL_dK, X, X2 = None):
@ -336,16 +357,17 @@ class EQ_ODE2(Kern):
        index = index.reshape(index.size,)
        glq, gS, gB, gC = self._gkdiag(X, index)
-        tmp = dL_dKdiag.reshape(index.size, 1)*glq
+        if dL_dKdiag.size == X.shape[0]:
            dL_dKdiag = np.reshape(dL_dKdiag, (index.size, 1))
        tmp = dL_dKdiag*glq
        self.lengthscale.gradient = tmp.sum(0)
-        #TODO: Avoid the reshape by a priori knowing the shape of dL_dKdiag
+        tmpB = dL_dKdiag*gB
-        tmpB = dL_dKdiag*gB.reshape(dL_dKdiag.shape)
+        tmpC = dL_dKdiag*gC
-        tmpC = dL_dKdiag*gC.reshape(dL_dKdiag.shape)
+        tmp = dL_dKdiag*gS
        tmp = dL_dKdiag.reshape(index.size, 1)*gS
        for d in np.unique(index):
            ind = np.where(index == d)
-            self.B.gradient[d] = tmpB[ind].sum()
+            self.B.gradient[d] = tmpB[ind, :].sum()
-            self.C.gradient[d] = tmpC[ind].sum()
+            self.C.gradient[d] = tmpC[ind, :].sum()
            self.W.gradient[d, :] = tmp[ind].sum(0)
    def gradients_X(self, dL_dK, X, X2=None):
@ -410,6 +432,23 @@ class EQ_ODE2(Kern):
        kuu[indc, indr] = kuu[indr, indc]
        return kuu
    def _Kusu(self, X, index, X2, index2):
        index = index.reshape(index.size,)
        index2 = index2.reshape(index2.size,)
        t = X[:, 0].reshape(X.shape[0],1)
        t2 = X2[:, 0].reshape(1,X2.shape[0])
        lq = self.lengthscale.values.reshape(self.rank,)
        #Covariance matrix initialization
        kuu = np.zeros((t.size, t2.size))
        for q in range(self.rank):
            ind1 = index == q
            ind2 = index2 == q
            r = t[ind1]/lq[q] - t2[0,ind2]/lq[q]
            r2 = r*r
            #Calculation of  covariance function
            kuu[np.ix_(ind1, ind2)] = np.exp(-r2)
        return kuu
    #Evaluation of cross-covariance function
    def _Kfu(self, X, index, X2, index2):
        #terms that move along t
@ -632,8 +671,8 @@ class EQ_ODE2(Kern):
        lq = self.lengthscale.values.reshape(1, self.rank)
        lq2 = lq*lq
-        gB = np.empty((t.size,))
+        gB = np.empty((t.size, lq.size))
-        gC = np.empty((t.size,))
+        gC = np.empty((t.size, lq.size))
        glq = np.empty((t.size, lq.size))
        gS = np.empty((t.size, lq.size))
@ -723,8 +762,8 @@ class EQ_ODE2(Kern):
            Ba4_1 = (S2lq*lq)*dgam_dB/w2
            Ba4 = Ba4_1*c
-            gB[ind3t] = np.sum(np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
+            gB[ind3t] = np.real(Ba1[ind]*upm) - np.real(((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv)\
-                + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd), axis=1)
+                + np.real(Ba4[ind]*upmd) + np.real((Ba4_1[ind]*ec)*upvd)
            # gradient wrt C
            dw_dC = - alphad*dw_dB
@ -738,8 +777,8 @@ class EQ_ODE2(Kern):
            Ca4_1 = (S2lq*lq)*dgam_dC/w2
            Ca4 = Ca4_1*c
-            gC[ind3t] = np.sum(np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
+            gC[ind3t] = np.real(Ca1[ind]*upm) - np.real(((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv)\
-                + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd), axis=1)
+                + np.real(Ca4[ind]*upmd) + np.real((Ca4_1[ind]*ec)*upvd)
            #Gradient wrt lengthscale
            #DxQ terms
@ -868,10 +907,10 @@ class EQ_ODE2(Kern):
            Ba2_1c = c0*(dgamc_dB*(0.5/gamc2 - 0.25*lq2) + 0.5/(w2*gamc))
            Ba2_2c = c0*dgamc_dB/gamc
-            gB[ind2t] = np.sum(Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
+            gB[ind2t] = Ba1[ind]*upm - ((Ba2_1[ind] + Ba2_2[ind]*t1)*egamt - Ba3[ind]*egamct)*upv\
                + Ba4[ind]*upmd + (Ba4_1[ind]*ec)*upvd\
                + Ba1c[ind]*upmc - ((Ba2_1c[ind] + Ba2_2c[ind]*t1)*egamct - Ba3c[ind]*egamt)*upvc\
-                + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc, axis=1)
+                + Ba4c[ind]*upmdc + (Ba4_1c[ind]*ec2)*upvdc
            ##Gradient wrt C
            dw_dC = 0.5*alphad/w
@ -895,10 +934,10 @@ class EQ_ODE2(Kern):
            Ca4_1c = S2lq2*(dgamc_dC/w2)
            Ca4c = Ca4_1c*c2
-            gC[ind2t] = np.sum(Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
+            gC[ind2t] = Ca1[ind]*upm - ((Ca2_1[ind] + Ca2_2[ind]*t1)*egamt - (Ca3_1[ind] + Ca3_2[ind]*t1)*egamct)*upv\
                + Ca4[ind]*upmd + (Ca4_1[ind]*ec)*upvd\
                + Ca1c[ind]*upmc - ((Ca2_1c[ind] + Ca2_2c[ind]*t1)*egamct - (Ca3_1c[ind] + Ca3_2c[ind]*t1)*egamt)*upvc\
-                + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc, axis=1)
+                + Ca4c[ind]*upmdc + (Ca4_1c[ind]*ec2)*upvdc
            #Gradient wrt lengthscale
            #DxQ terms
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@ -181,7 +181,7 @@ class Fixed(Static):
        self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
    def update_gradients_diag(self, dL_dKdiag, X):
-        self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
+        self.variance.gradient = np.einsum('i,i', dL_dKdiag, np.diagonal(self.fixed_K))
    def psi2(self, Z, variational_posterior):
        return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
@ -192,3 +192,53 @@ class Fixed(Static):
    def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
        self.variance.gradient = dL_dpsi0.sum()
 class Precomputed(Fixed):
    def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='precomputed'):
        """
        Class for precomputed kernels, indexed by columns in X
        Usage example:
        import numpy as np
        from GPy.models import GPClassification
        from GPy.kern import Precomputed
        from sklearn.cross_validation import LeaveOneOut
        n = 10
        d = 100
        X = np.arange(n).reshape((n,1))         # column vector of indices
        y = 2*np.random.binomial(1,0.5,(n,1))-1
        X0 = np.random.randn(n,d)
        k = np.dot(X0,X0.T)
        kern = Precomputed(1,k)                 # k is a n x n covariance matrix
        cv = LeaveOneOut(n)
        ypred = y.copy()
        for train, test in cv:
            m = GPClassification(X[train], y[train], kernel=kern)
            m.optimize()
            ypred[test] = 2*(m.predict(X[test])[0]>0.5)-1
        :param input_dim: the number of input dimensions
        :type input_dim: int
        :param variance: the variance of the kernel
        :type variance: float
        """
        super(Precomputed, self).__init__(input_dim, covariance_matrix, variance, active_dims, name)
    def K(self, X, X2=None):
        if X2 is None:
            return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')]
        else:
            return self.variance * self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')]
    def Kdiag(self, X):
        return self.variance * self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')].diagonal()
    def update_gradients_full(self, dL_dK, X, X2=None):
        if X2 is None:
            self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
        else:
            self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K[X[:,0].astype('int')][:,X2[:,0].astype('int')])
    def update_gradients_diag(self, dL_dKdiag, X):
        self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self.fixed_K[X[:,0].astype('int')][:,X[:,0].astype('int')])
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -315,10 +315,10 @@ class Exponential(Stationary):
        super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
    def K_of_r(self, r):
-        return self.variance * np.exp(-0.5 * r)
+        return self.variance * np.exp(-r)
    def dK_dr(self, r):
-        return -0.5*self.K_of_r(r)
+        return -self.K_of_r(r)
 #    def sde(self): 
 #        """ 
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -24,3 +24,5 @@ from .one_vs_all_sparse_classification import OneVsAllSparseClassification
 from .dpgplvm import DPBayesianGPLVM
 from .state_space_model import StateSpace
 from .ibp_lfm import IBPLFM
--- a/GPy/models/ibp_lfm.py
+++ b/GPy/models/ibp_lfm.py
@ -0,0 +1,535 @@
 # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 import numpy as np
 from ..core.sparse_gp_mpi import SparseGP_MPI
 from .. import kern
 from ..util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, pdinv
 from ..util import diag
 from ..core.parameterization import Param
 from ..likelihoods import Gaussian
 from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
 from ..inference.latent_function_inference.posterior import Posterior
 from GPy.core.parameterization.variational import VariationalPrior
 from ..core.parameterization.parameterized import Parameterized
 from paramz.transformations import Logexp, Logistic, __fixed__
 log_2_pi = np.log(2*np.pi)
 class VarDTC_minibatch_IBPLFM(VarDTC_minibatch):
    '''
    Modifications of VarDTC_minibatch for IBP LFM
    '''
    def __init__(self, batchsize=None, limit=3, mpi_comm=None):
        super(VarDTC_minibatch_IBPLFM, self).__init__(batchsize, limit, mpi_comm)
    def gatherPsiStat(self, kern, X, Z, Y, beta, Zp):
        het_noise = beta.size > 1
        assert beta.size == 1
        trYYT = self.get_trYYT(Y)
        if self.Y_speedup and not het_noise:
            Y = self.get_YYTfactor(Y)
        num_inducing = Z.shape[0]
        num_data, output_dim = Y.shape
        batchsize = num_data if self.batchsize is None else self.batchsize
        psi2_full = np.zeros((num_inducing, num_inducing)) # MxM
        psi1Y_full = np.zeros((output_dim, num_inducing)) # DxM
        psi0_full = 0.
        YRY_full = 0.
        for n_start in range(0, num_data, batchsize):
            n_end = min(batchsize+n_start, num_data)
            if batchsize == num_data:
                Y_slice = Y
                X_slice = X
            else:
                Y_slice = Y[n_start:n_end]
                X_slice = X[n_start:n_end]
            if het_noise:
                b = beta[n_start]
                YRY_full += np.inner(Y_slice, Y_slice)*b
            else:
                b = beta
            psi0 = kern._Kdiag(X_slice) #Kff^q
            psi1 = kern.K(X_slice, Z) #Kfu
            indX = X_slice.values
            indX = np.int_(np.round(indX[:, -1]))
            Zp = Zp.gamma.values
            # Extend Zp across columns
            indZ = Z.values
            indZ = np.int_(np.round(indZ[:, -1])) - Zp.shape[0]
            Zpq = Zp[:, indZ]
            for d in np.unique(indX):
                indd = indX == d
                psi1d = psi1[indd, :]
                Zpd = Zp[d, :]
                Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
                psi2_full += (np.dot(psi1d.T, psi1d)*Zp2[np.ix_(indZ, indZ)])*b #Zp2*Kufd*Kfud*beta
            psi0_full += np.sum(psi0*Zp[indX, :])*b
            psi1Y_full += np.dot(Y_slice.T, psi1*Zpq[indX, :])*b
        if not het_noise:
            YRY_full = trYYT*beta
        if self.mpi_comm is not None:
            from mpi4py import MPI
            psi0_all = np.array(psi0_full)
            psi1Y_all = psi1Y_full.copy()
            psi2_all = psi2_full.copy()
            YRY_all = np.array(YRY_full)
            self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
            self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
            self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
            self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
            return psi0_all, psi1Y_all, psi2_all, YRY_all
        return psi0_full, psi1Y_full, psi2_full, YRY_full
    def inference_likelihood(self, kern, X, Z, likelihood, Y, Zp):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm
        Cached intermediate results: Kmm, KmmInv,
        """
        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        if self.mpi_comm is not None:
            from mpi4py import MPI
            num_data_all = np.array(num_data,dtype=np.int32)
            self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
            num_data = num_data_all
        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.variance, 1e-6)
        het_noise = beta.size > 1
        if het_noise:
            self.batchsize = 1
        psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, Zp)
        #======================================================================
        # Compute Common Components
        #======================================================================
        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if not np.isfinite(Kmm).all():
            print(Kmm)
        Lm = jitchol(Kmm)
        LmInv = dtrtri(Lm)
        LmInvPsi2LmInvT = np.dot(LmInv, np.dot(psi2_full, LmInv.T))
        Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LLInv = dtrtri(LL)
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        LmLLInv = np.dot(LLInv, LmInv)
        b = np.dot(psi1Y_full, LmLLInv.T)
        bbt = np.sum(np.square(b))
        v = np.dot(b, LmLLInv).T
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        tmp = -np.dot(np.dot(LLInv.T, LLinvPsi1TYYTPsi1LLinvT + output_dim*np.eye(input_dim)), LLInv)
        dL_dpsi2R = .5*np.dot(np.dot(LmInv.T, tmp + output_dim*np.eye(input_dim)), LmInv)
        # Cache intermediate results
        self.midRes['dL_dpsi2R'] = dL_dpsi2R
        self.midRes['v'] = v
        #======================================================================
        # Compute log-likelihood
        #======================================================================
        if het_noise:
            logL_R = -np.sum(np.log(beta))
        else:
            logL_R = -num_data*np.log(beta)
        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)*.5 - output_dim*logdet_L*.5
        #======================================================================
        # Compute dL_dKmm
        #======================================================================
        dL_dKmm = dL_dpsi2R - .5*output_dim*np.dot(np.dot(LmInv.T, LmInvPsi2LmInvT), LmInv)
        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================
        if not self.Y_speedup or het_noise:
            wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
            post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
        else:
            post = None
        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================
        if not het_noise:
            dL_dthetaL = .5*(YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta) - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
            self.midRes['dL_dthetaL'] = dL_dthetaL
        return logL, dL_dKmm, post
    def inference_minibatch(self, kern, X, Z, likelihood, Y, Zp):
        """
        The second phase of inference: Computing the derivatives over a minibatch of Y
        Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
        return a flag showing whether it reached the end of Y (isEnd)
        """
        num_data, output_dim = Y.shape
        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.variance, 1e-6)
        het_noise = beta.size > 1
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = beta*self.get_YYTfactor(Y)
        if self.Y_speedup and not het_noise:
            YYT_factor = self.get_YYTfactor(Y)
        else:
            YYT_factor = Y
        n_start = self.batch_pos
        batchsize = num_data if self.batchsize is None else self.batchsize
        n_end = min(batchsize+n_start, num_data)
        if n_end == num_data:
            isEnd = True
            self.batch_pos = 0
        else:
            isEnd = False
            self.batch_pos = n_end
        if batchsize == num_data:
            Y_slice = YYT_factor
            X_slice = X
        else:
            Y_slice = YYT_factor[n_start:n_end]
            X_slice = X[n_start:n_end]
        psi0 = kern._Kdiag(X_slice) #Kffdiag
        psi1 = kern.K(X_slice, Z) #Kfu
        betapsi1 = np.einsum('n,nm->nm', beta, psi1)
        X_slice = X_slice.values
        Z = Z.values
        Zp = Zp.gamma.values
        indX = np.int_(X_slice[:, -1])
        indZ = np.int_(Z[:, -1]) - Zp.shape[0]
        betaY = beta*Y_slice
        #======================================================================
        # Load Intermediate Results
        #======================================================================
        dL_dpsi2R = self.midRes['dL_dpsi2R']
        v = self.midRes['v']
        #======================================================================
        # Compute dL_dpsi
        #======================================================================
        dL_dpsi0 = -.5*output_dim*(beta * Zp[indX, :]) #XxQ #TODO: Check this gradient
        dL_dpsi1 = np.dot(betaY, v.T)
        dL_dEZp  = psi1*dL_dpsi1
        dL_dpsi1 = Zp[np.ix_(indX, indZ)]*dL_dpsi1
        dL_dgamma = np.zeros(Zp.shape)
        for d in np.unique(indX):
            indd = indX == d
            betapsi1d = betapsi1[indd, :]
            psi1d = psi1[indd, :]
            Zpd = Zp[d, :]
            Zp2 = Zpd[:, None]*Zpd[None, :] - np.diag(np.power(Zpd, 2)) + np.diag(Zpd)
            dL_dpsi1[indd, :] += np.dot(betapsi1d, Zp2[np.ix_(indZ, indZ)] * dL_dpsi2R)*2.
            dL_EZp2 = dL_dpsi2R * (np.dot(psi1d.T, psi1d) * beta)*2.  # Zpd*Kufd*Kfud*beta
            #Gradient of Likelihood wrt gamma is calculated here
            EZ = Zp[d, indZ]
            for q in range(Zp.shape[1]):
                EZt = EZ.copy()
                indq = indZ == q
                EZt[indq] = .5
                dL_dgamma[d, q] = np.sum(dL_dEZp[np.ix_(indd, indq)]) + np.sum(dL_EZp2[:, indq]*EZt[:, None]) -\
                    .5*beta*(np.sum(psi0[indd, q]))
        #======================================================================
        # Compute dL_dthetaL
        #======================================================================
        if isEnd:
            dL_dthetaL = self.midRes['dL_dthetaL']
        else:
            dL_dthetaL = 0.
        grad_dict = {'dL_dKdiag': dL_dpsi0,
                     'dL_dKnm': dL_dpsi1,
                     'dL_dthetaL': dL_dthetaL,
                     'dL_dgamma': dL_dgamma}
        return isEnd, (n_start, n_end), grad_dict
 def update_gradients(model, mpi_comm=None):
    if mpi_comm is None:
        Y = model.Y
        X = model.X
    else:
        Y = model.Y_local
        X = model.X[model.N_range[0]:model.N_range[1]]
    model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
    het_noise = model.likelihood.variance.size > 1
    if het_noise:
        dL_dthetaL = np.empty((model.Y.shape[0],))
    else:
        dL_dthetaL = np.float64(0.)
    kern_grad = model.kern.gradient.copy()
    kern_grad[:] = 0.
    model.Z.gradient = 0.
    gamma_gradient = model.Zp.gamma.copy()
    gamma_gradient[:] = 0.
    isEnd = False
    while not isEnd:
        isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y, model.Zp)
        if (n_range[1]-n_range[0]) == X.shape[0]:
            X_slice = X
        elif mpi_comm is None:
            X_slice = model.X[n_range[0]:n_range[1]]
        else:
            X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
        #gradients w.r.t. kernel
        model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice)
        kern_grad += model.kern.gradient
        model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z)
        kern_grad += model.kern.gradient
        #gradients w.r.t. Z
        model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice)
        #gradients w.r.t. posterior parameters of Zp
        gamma_gradient += grad_dict['dL_dgamma']
        if het_noise:
            dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
        else:
            dL_dthetaL += grad_dict['dL_dthetaL']
    # Gather the gradients from multiple MPI nodes
    if mpi_comm is not None:
        from mpi4py import MPI
        if het_noise:
            raise "het_noise not implemented!"
        kern_grad_all = kern_grad.copy()
        Z_grad_all = model.Z.gradient.copy()
        gamma_grad_all = gamma_gradient.copy()
        mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
        mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
        mpi_comm.Allreduce([gamma_gradient, MPI.DOUBLE], [gamma_grad_all, MPI.DOUBLE])
        kern_grad = kern_grad_all
        model.Z.gradient = Z_grad_all
        gamma_gradient = gamma_grad_all
    #gradients w.r.t. kernel
    model.kern.update_gradients_full(dL_dKmm, model.Z, None)
    model.kern.gradient += kern_grad
    #gradients w.r.t. Z
    model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
    #gradient w.r.t. gamma
    model.Zp.gamma.gradient = gamma_gradient
    # Update Log-likelihood
    KL_div = model.variational_prior.KL_divergence(model.Zp)
    # update for the KL divergence
    model.variational_prior.update_gradients_KL(model.Zp)
    model._log_marginal_likelihood += KL_div
    # dL_dthetaL
    model.likelihood.update_gradients(dL_dthetaL)
 class IBPPosterior(Parameterized):
    '''
    The IBP distribution for variational approximations.
    '''
    def __init__(self, binary_prob, tau=None, name='Sensitivity space', *a, **kw):
        """
        binary_prob : the probability of including a latent function over an output.
        """
        super(IBPPosterior, self).__init__(name=name, *a, **kw)
        self.gamma = Param("binary_prob", binary_prob, Logistic(1e-10, 1. - 1e-10))
        self.link_parameter(self.gamma)
        if tau is not None:
            assert tau.size == 2*self.gamma_.shape[1]
            self.tau = Param("tau", tau, Logexp())
        else:
            self.tau = Param("tau", np.ones((2, self.gamma.shape[1])), Logexp())
        self.link_parameter(self.tau)
    def set_gradients(self, grad):
        self.gamma.gradient, self.tau.gradient = grad
    def __getitem__(self, s):
        pass
    #     if isinstance(s, (int, slice, tuple, list, np.ndarray)):
    #         import copy
    #         n = self.__new__(self.__class__, self.name)
    #         dc = self.__dict__.copy()
    #         dc['binary_prob'] = self.binary_prob[s]
    #         dc['tau'] = self.tau
    #         dc['parameters'] = copy.copy(self.parameters)
    #         n.__dict__.update(dc)
    #         n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
    #         n.parameters[dc['tau']._parent_index_] = dc['tau']
    #         n._gradient_array_ = None
    #         oversize = self.size - self.gamma.size - self.tau.size
    #         n.size = n.gamma.size + n.tau.size + oversize
    #         return n
    #     else:
    #         return super(IBPPosterior, self).__getitem__(s)
 class IBPPrior(VariationalPrior):
    def __init__(self, rank, alpha=2., name='IBPPrior', **kw):
        super(IBPPrior, self).__init__(name=name, **kw)
        from paramz.transformations import __fixed__  
        self.rank = rank
        self.alpha = Param('alpha', alpha, __fixed__)
        self.link_parameter(self.alpha)
    def KL_divergence(self, variational_posterior):
        from scipy.special import gamma, psi
        eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
        sum_eta = np.sum(eta, axis=0) #sum_d gamma(d,q)
        D_seta = eta.shape[0] - sum_eta
        ad = self.alpha/eta.shape[1]
        psitau1 = psi(tau[0, :])
        psitau2 = psi(tau[1, :])
        sumtau = np.sum(tau, axis=0)
        psitau = psi(sumtau)
        # E[log p(z)]
        part1 = np.sum(sum_eta*psitau1 + D_seta*psitau2 - eta.shape[0]*psitau)
        # E[log p(pi)]
        part1 += (ad - 1.)*np.sum(psitau1 - psitau) + eta.shape[1]*np.log(ad)
        #H(z)
        part2 = np.sum(-(1.-eta)*np.log(1.-eta) - eta*np.log(eta))
        #H(pi)
        part2 += np.sum(np.log(gamma(tau[0, :])*gamma(tau[1, :])/gamma(sumtau))-(tau[0, :]-1.)*psitau1-(tau[1, :]-1.)*psitau2\
                 + (sumtau-2.)*psitau)
        return part1+part2
    def update_gradients_KL(self, variational_posterior):
        eta, tau = variational_posterior.gamma.values, variational_posterior.tau.values
        from scipy.special import psi, polygamma
        dgamma = np.log(1. - eta) - np.log(eta) + psi(tau[0, :]) - psi(tau[1, :])
        variational_posterior.gamma.gradient += dgamma
        ad = self.alpha/self.rank
        sumeta = np.sum(eta, axis=0)
        sumtau = np.sum(tau, axis=0)
        common = (-eta.shape[0] - (ad - 1.) + (sumtau - 2.))*polygamma(1, sumtau)
        variational_posterior.tau.gradient[0, :] = (sumeta + ad - tau[0, :])*polygamma(1, tau[0, :]) + common
        variational_posterior.tau.gradient[1, :] = ((eta.shape[0] - sumeta) - (tau[1, :] - 1.))*polygamma(1, tau[1, :])\
                                                   + common
 class IBPLFM(SparseGP_MPI):
    """
    Indian Buffet Process for Latent Force Models
    :param Y: observed data (np.ndarray) or GPy.likelihood
    :type Y: np.ndarray| GPy.likelihood instance
    :param X: input data (np.ndarray) [X:values, X:index], index refers to the number of the output
    :type X: np.ndarray
    :param input_dim: latent dimensionality
    :type input_dim: int
    : param rank: number of latent functions
    """
    def __init__(self, X, Y, input_dim=2, output_dim=1, rank=1, Gamma=None, num_inducing=10,
                 Z=None, kernel=None, inference_method=None, likelihood=None, name='IBP for LFM', alpha=2., beta=2., connM=None, tau=None, mpi_comm=None, normalizer=False, variational_prior=None,**kwargs):
        if kernel is None:
            kernel = kern.EQ_ODE2(input_dim, output_dim, rank)
        if Gamma is None:
            gamma = np.empty((output_dim, rank)) # The posterior probabilities of the binary variable in the variational approximation
            gamma[:] = 0.5 + 0.1 * np.random.randn(output_dim, rank)
            gamma[gamma>1.-1e-9] = 1.-1e-9
            gamma[gamma<1e-9] = 1e-9
        else:
            gamma = Gamma.copy()
        #TODO: create a vector of inducing points
        if Z is None:
            Z = np.random.permutation(X.copy())[:num_inducing]
        assert Z.shape[1] == X.shape[1]
        if likelihood is None:
            likelihood = Gaussian()
        if inference_method is None:
            inference_method = VarDTC_minibatch_IBPLFM(mpi_comm=mpi_comm)
        #Definition of variational terms
        self.variational_prior = IBPPrior(rank=rank, alpha=alpha) if variational_prior is None else variational_prior
        self.Zp = IBPPosterior(gamma, tau=tau)
        super(IBPLFM, self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
        self.link_parameter(self.Zp, index=0)
    def set_Zp_gradients(self, Zp, Zp_grad):
        """Set the gradients of the posterior distribution of Zp in its specific form."""
        Zp.gamma.gradient = Zp_grad
    def get_Zp_gradients(self, Zp):
        """Get the gradients of the posterior distribution of Zp in its specific form."""
        return Zp.gamma.gradient
    def _propogate_Zp_val(self):
        pass
    def parameters_changed(self):
        #super(IBPLFM,self).parameters_changed()
        if isinstance(self.inference_method, VarDTC_minibatch_IBPLFM):
            update_gradients(self,  mpi_comm=self.mpi_comm)
            return
        # Add the KL divergence term
        self._log_marginal_likelihood += self.variational_prior.KL_divergence(self.Zp)
        #TODO Change the following according to this variational distribution
        #self.Zp.gamma.gradient = self.
        # update for the KL divergence
        self.variational_prior.update_gradients_KL(self.Zp)
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@ -3237,6 +3237,7 @@ class ContDescrStateSpace(DescreteStateSpace):
            AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
            Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
            Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
            # The covariance matrix Q by matrix fraction decomposition <-
            if compute_derivatives:
@ -3276,7 +3277,8 @@ class ContDescrStateSpace(DescreteStateSpace):
            else:
              dA = None
              dQ = None
-              Q_noise = Q_noise_1
+              Q_noise = Q_noise_2
 	      # Innacuracies have been observed when Q_noise_1 was used.
            #Q_noise = Q_noise_1
--- a/GPy/models/state_space_model.py
+++ b/GPy/models/state_space_model.py
@ -15,19 +15,10 @@
 #
 import numpy as np
 from scipy import linalg
 from scipy import stats
 from ..core import Model
 from .. import kern
 #from GPy.plotting.matplot_dep.models_plots import gpplot
 #from GPy.plotting.matplot_dep.base_plots import x_frame1D
 #from GPy.plotting.matplot_dep import Tango
 #import pylab as pb
 from GPy.core.parameterization.param import Param
 import GPy
 from .. import likelihoods
-
+#from . import state_space_setup as ss_setup
 from ..core import Model
 from . import state_space_main as ssm
 from . import state_space_setup as ss_setup
@ -37,12 +28,12 @@ class StateSpace(Model):
        if len(X.shape) == 1:
            X = np.atleast_2d(X).T
-        self.num_data, input_dim = X.shape
+        self.num_data, self.input_dim = X.shape
        if len(Y.shape) == 1:
            Y = np.atleast_2d(Y).T
-        assert input_dim==1, "State space methods are only for 1D data"
+        assert self.input_dim==1, "State space methods are only for 1D data"
        if len(Y.shape)==2:
            num_data_Y, self.output_dim = Y.shape
@ -168,7 +159,7 @@ class StateSpace(Model):
    def log_likelihood(self):
        return self._log_marginal_likelihood
-    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False):
+    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
        """
        Performs the actual prediction for new X points.
        Inner function. It is called only from inside this class.
@ -270,22 +261,23 @@ class StateSpace(Model):
        # Return the posterior of the state
        return (m, V)
-    def predict(self, Xnew=None, filteronly=False):
+    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
        # Run the Kalman filter to get the state
        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
        # Add the noise variance to the state variance
-        V += float(self.Gaussian_noise.variance)
+        if include_likelihood:
            V += float(self.likelihood.variance)
        # Lower and upper bounds
-        lower = m - 2*np.sqrt(V)
+        #lower = m - 2*np.sqrt(V)
-        upper = m + 2*np.sqrt(V)
+        #upper = m + 2*np.sqrt(V)
        # Return mean and variance
-        return (m, V, lower, upper)
+        return m, V
-    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5)):
+    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
        mu, var = self._raw_predict(Xnew)
        #import pdb; pdb.set_trace()
        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]
--- a/GPy/plotting/init.py
+++ b/GPy/plotting/init.py
@ -52,6 +52,17 @@ def inject_plotting():
        GP.plot_f = gpy_plot.gp_plots.plot_f
        GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
        from ..models import StateSpace
        StateSpace.plot_data = gpy_plot.data_plots.plot_data
        StateSpace.plot_data_error = gpy_plot.data_plots.plot_data_error
        StateSpace.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
        StateSpace.plot_mean = gpy_plot.gp_plots.plot_mean
        StateSpace.plot_confidence = gpy_plot.gp_plots.plot_confidence
        StateSpace.plot_density = gpy_plot.gp_plots.plot_density
        StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
        StateSpace.plot = gpy_plot.gp_plots.plot
        StateSpace.plot_f = gpy_plot.gp_plots.plot_f
        from ..core import SparseGP
        SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
--- a/GPy/plotting/gpy_plot/data_plots.py
+++ b/GPy/plotting/gpy_plot/data_plots.py
@ -158,7 +158,7 @@ def _plot_data_error(self, canvas, which_data_rows='all',
    return plots
-def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **plot_kwargs):
+def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', legend=True, **plot_kwargs):
    """
    Plot the inducing inputs of a sparse gp model
@ -167,7 +167,7 @@ def plot_inducing(self, visible_dims=None, projection='2d', label='inducing', **
    """
    canvas, kwargs = pl().new_canvas(projection=projection, **plot_kwargs)
    plots = _plot_inducing(self, canvas, visible_dims, projection, label, **kwargs)
-    return pl().add_to_canvas(canvas, plots, legend=label is not None)
+    return pl().add_to_canvas(canvas, plots, legend=legend)
 def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs):
    if visible_dims is None:
@ -175,7 +175,7 @@ def _plot_inducing(self, canvas, visible_dims, projection, label, **plot_kwargs)
        visible_dims = [i for i in sig_dims if i is not None]
    free_dims = get_free_dims(self, visible_dims, None)
-    Z = self.Z[:, free_dims]
+    Z = self.Z.values
    plots = {}
    #one dimensional plotting
--- a/GPy/plotting/gpy_plot/latent_plots.py
+++ b/GPy/plotting/gpy_plot/latent_plots.py
@ -112,28 +112,29 @@ def plot_latent_inducing(self,
                        which_indices=None,
                        legend=False,
                        plot_limits=None,
-                        marker='^',
+                        marker=None,
                        num_samples=1000,
                        projection='2d',
                        **kwargs):
    """
    Plot a scatter plot of the inducing inputs.
-    :param array-like labels: a label for each data point (row) of the inputs
+    :param [int] which_indices: which input dimensions to plot against each other
    :param (int, int) which_indices: which input dimensions to plot against each other
    :param bool legend: whether to plot the legend on the figure
    :param plot_limits: the plot limits for the plot
    :type plot_limits: (xmin, xmax, ymin, ymax) or ((xmin, xmax), (ymin, ymax))
-    :param str marker: markers to use - cycle if more labels then markers are given
+    :param str marker: marker to use [default is custom arrow like]
    :param kwargs: the kwargs for the scatter plots
    :param str projection: for now 2d or 3d projection (other projections can be implemented, see developer documentation)
    """
    canvas, projection, kwargs, sig_dims = _new_canvas(self, projection, kwargs, which_indices)
-    Z = self.Z.values
+    if legend: label = 'inducing'
-    labels = np.array(['inducing'] * Z.shape[0])
+    else: label = None
    if marker is not None:
        kwargs['marker'] = marker
    update_not_existing_kwargs(kwargs, pl().defaults.inducing_2d)  # @UndefinedVariable
-    scatters = _plot_latent_scatter(canvas, Z, sig_dims, labels, num_samples=num_samples, projection=projection, **kwargs)
+    from .data_plots import _plot_inducing
    scatters = _plot_inducing(self, canvas, sig_dims[:2], projection, label, **kwargs)
    return pl().add_to_canvas(canvas, dict(scatter=scatters), legend=legend)
--- a/GPy/plotting/gpy_plot/plot_util.py
+++ b/GPy/plotting/gpy_plot/plot_util.py
@ -190,6 +190,7 @@ def scatter_label_generator(labels, X, visible_dims, marker=None):
            x = X[index, input_1]
            y = X[index, input_2]
            z = X[index, input_3]
        yield x, y, z, this_label, index, m
 def subsample_X(X, labels, num_samples=1000):
--- a/GPy/plotting/matplot_dep/defaults.py
+++ b/GPy/plotting/matplot_dep/defaults.py
@ -45,7 +45,7 @@ it gives back an empty default, when defaults are not defined.
 # Data plots:
 data_1d = dict(lw=1.5, marker='x', color='k')
 data_2d = dict(s=35, edgecolors='none', linewidth=0., cmap=cm.get_cmap('hot'), alpha=.5)
-inducing_1d = dict(lw=0, s=500, facecolors=Tango.colorsHex['darkRed'])
+inducing_1d = dict(lw=0, s=500, color=Tango.colorsHex['darkRed'])
 inducing_2d = dict(s=17, edgecolor='k', linewidth=.4, color='white', alpha=.5, marker='^')
 inducing_3d = dict(lw=.3, s=500, color=Tango.colorsHex['darkRed'], edgecolor='k')
 xerrorbar = dict(color='k', fmt='none', elinewidth=.5, alpha=.5)
--- a/GPy/plotting/matplot_dep/plot_definitions.py
+++ b/GPy/plotting/matplot_dep/plot_definitions.py
@ -106,7 +106,7 @@ class MatplotlibPlots(AbstractPlottingLibrary):
            return ax.plot(X, Y, color=color, zs=Z, label=label, **kwargs)
        return ax.plot(X, Y, color=color, label=label, **kwargs)
-    def plot_axis_lines(self, ax, X, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
+    def plot_axis_lines(self, ax, X, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
        from matplotlib import transforms
        from matplotlib.path import Path
        if 'marker' not in kwargs:
@ -126,14 +126,14 @@ class MatplotlibPlots(AbstractPlottingLibrary):
               bottom=bottom, label=label, color=color,
               **kwargs)
-    def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
+    def xerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
        if not('linestyle' in kwargs or 'ls' in kwargs):
            kwargs['ls'] = 'none'
        #if Z is not None:
        #    return ax.errorbar(X, Y, Z, xerr=error, ecolor=color, label=label, **kwargs)
        return ax.errorbar(X, Y, xerr=error, ecolor=color, label=label, **kwargs)
-    def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['mediumBlue'], label=None, **kwargs):
+    def yerrorbar(self, ax, X, Y, error, color=Tango.colorsHex['darkRed'], label=None, **kwargs):
        if not('linestyle' in kwargs or 'ls' in kwargs):
            kwargs['ls'] = 'none'
        #if Z is not None:
--- a/GPy/plotting/plotly_dep/plot_definitions.py
+++ b/GPy/plotting/plotly_dep/plot_definitions.py
@ -131,14 +131,15 @@ class PlotlyPlots(AbstractPlottingLibrary):
            #not matplotlib marker
            pass
        marker_kwargs = marker_kwargs or {}
-        marker_kwargs.setdefault('symbol', marker)
+        if 'symbol' not in marker_kwargs:
            marker_kwargs['symbol'] = marker
        if Z is not None:
            return Scatter3d(x=X, y=Y, z=Z, mode='markers',
                             showlegend=label is not None,
                             marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
                             name=label, **kwargs)
        return Scatter(x=X, y=Y, mode='markers', showlegend=label is not None,
-                       marker=Marker(color=color, colorscale=cmap, **marker_kwargs or {}),
+                       marker=Marker(color=color, colorscale=cmap, **marker_kwargs),
                       name=label, **kwargs)
    def plot(self, ax, X, Y, Z=None, color=None, label=None, line_kwargs=None, **kwargs):
--- a/GPy/testing/baseline/bayesian_gplvm_gradient.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_gradient.npz
--- a/GPy/testing/baseline/bayesian_gplvm_gradient.png
+++ b/GPy/testing/baseline/bayesian_gplvm_gradient.png
--- a/GPy/testing/baseline/bayesian_gplvm_inducing.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing.npz
--- a/GPy/testing/baseline/bayesian_gplvm_inducing.png
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing.png
--- a/GPy/testing/baseline/bayesian_gplvm_inducing_3d.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing_3d.npz
--- a/GPy/testing/baseline/bayesian_gplvm_inducing_3d.png
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing_3d.png
--- a/GPy/testing/baseline/bayesian_gplvm_latent.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_latent.npz
--- a/GPy/testing/baseline/bayesian_gplvm_latent.png
+++ b/GPy/testing/baseline/bayesian_gplvm_latent.png
--- a/GPy/testing/baseline/bayesian_gplvm_latent_3d.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_latent_3d.npz
--- a/GPy/testing/baseline/bayesian_gplvm_latent_3d.png
+++ b/GPy/testing/baseline/bayesian_gplvm_latent_3d.png
--- a/GPy/testing/baseline/bayesian_gplvm_magnification.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_magnification.npz
--- a/GPy/testing/baseline/bayesian_gplvm_magnification.png
+++ b/GPy/testing/baseline/bayesian_gplvm_magnification.png
--- a/GPy/testing/baseline/coverage_3d_plot.npz
+++ b/GPy/testing/baseline/coverage_3d_plot.npz
--- a/GPy/testing/baseline/coverage_3d_plot.png
+++ b/GPy/testing/baseline/coverage_3d_plot.png
--- a/GPy/testing/baseline/coverage_annotation_interact.npz
+++ b/GPy/testing/baseline/coverage_annotation_interact.npz
--- a/GPy/testing/baseline/coverage_annotation_interact.png
+++ b/GPy/testing/baseline/coverage_annotation_interact.png
--- a/GPy/testing/baseline/coverage_gradient.npz
+++ b/GPy/testing/baseline/coverage_gradient.npz
--- a/GPy/testing/baseline/coverage_gradient.png
+++ b/GPy/testing/baseline/coverage_gradient.png
--- a/GPy/testing/baseline/coverage_imshow_interact.npz
+++ b/GPy/testing/baseline/coverage_imshow_interact.npz
--- a/GPy/testing/baseline/coverage_imshow_interact.png
+++ b/GPy/testing/baseline/coverage_imshow_interact.png
--- a/GPy/testing/baseline/gp_2d_data.npz
+++ b/GPy/testing/baseline/gp_2d_data.npz
--- a/GPy/testing/baseline/gp_2d_data.png
+++ b/GPy/testing/baseline/gp_2d_data.png
--- a/GPy/testing/baseline/gp_2d_in_error.npz
+++ b/GPy/testing/baseline/gp_2d_in_error.npz
--- a/GPy/testing/baseline/gp_2d_in_error.png
+++ b/GPy/testing/baseline/gp_2d_in_error.png
--- a/GPy/testing/baseline/gp_2d_inducing.npz
+++ b/GPy/testing/baseline/gp_2d_inducing.npz
--- a/GPy/testing/baseline/gp_2d_inducing.png
+++ b/GPy/testing/baseline/gp_2d_inducing.png
--- a/GPy/testing/baseline/gp_2d_mean.npz
+++ b/GPy/testing/baseline/gp_2d_mean.npz
--- a/GPy/testing/baseline/gp_2d_mean.png
+++ b/GPy/testing/baseline/gp_2d_mean.png
--- a/GPy/testing/baseline/gp_3d_data.npz
+++ b/GPy/testing/baseline/gp_3d_data.npz
--- a/GPy/testing/baseline/gp_3d_data.png
+++ b/GPy/testing/baseline/gp_3d_data.png
--- a/GPy/testing/baseline/gp_3d_inducing.npz
+++ b/GPy/testing/baseline/gp_3d_inducing.npz
--- a/GPy/testing/baseline/gp_3d_inducing.png
+++ b/GPy/testing/baseline/gp_3d_inducing.png
--- a/GPy/testing/baseline/gp_3d_mean.npz
+++ b/GPy/testing/baseline/gp_3d_mean.npz
--- a/GPy/testing/baseline/gp_3d_mean.png
+++ b/GPy/testing/baseline/gp_3d_mean.png
--- a/GPy/testing/baseline/gp_class_likelihood.npz
+++ b/GPy/testing/baseline/gp_class_likelihood.npz
--- a/GPy/testing/baseline/gp_class_likelihood.png
+++ b/GPy/testing/baseline/gp_class_likelihood.png
--- a/GPy/testing/baseline/gp_class_raw.npz
+++ b/GPy/testing/baseline/gp_class_raw.npz
--- a/GPy/testing/baseline/gp_class_raw.png
+++ b/GPy/testing/baseline/gp_class_raw.png
--- a/GPy/testing/baseline/gp_class_raw_link.npz
+++ b/GPy/testing/baseline/gp_class_raw_link.npz
--- a/GPy/testing/baseline/gp_class_raw_link.png
+++ b/GPy/testing/baseline/gp_class_raw_link.png
--- a/GPy/testing/baseline/gp_conf.npz
+++ b/GPy/testing/baseline/gp_conf.npz
--- a/GPy/testing/baseline/gp_conf.png
+++ b/GPy/testing/baseline/gp_conf.png
--- a/GPy/testing/baseline/gp_data.npz
+++ b/GPy/testing/baseline/gp_data.npz
--- a/GPy/testing/baseline/gp_data.png
+++ b/GPy/testing/baseline/gp_data.png
--- a/GPy/testing/baseline/gp_density.npz
+++ b/GPy/testing/baseline/gp_density.npz
--- a/GPy/testing/baseline/gp_density.png
+++ b/GPy/testing/baseline/gp_density.png
--- a/GPy/testing/baseline/gp_in_error.npz
+++ b/GPy/testing/baseline/gp_in_error.npz
--- a/GPy/testing/baseline/gp_in_error.png
+++ b/GPy/testing/baseline/gp_in_error.png
--- a/GPy/testing/baseline/gp_mean.npz
+++ b/GPy/testing/baseline/gp_mean.npz
--- a/GPy/testing/baseline/gp_mean.png
+++ b/GPy/testing/baseline/gp_mean.png
--- a/GPy/testing/baseline/gp_out_error.npz
+++ b/GPy/testing/baseline/gp_out_error.npz
--- a/GPy/testing/baseline/gp_out_error.png
+++ b/GPy/testing/baseline/gp_out_error.png
--- a/GPy/testing/baseline/gp_samples.npz
+++ b/GPy/testing/baseline/gp_samples.npz
--- a/GPy/testing/baseline/gp_samples.png
+++ b/GPy/testing/baseline/gp_samples.png
--- a/GPy/testing/baseline/gplvm_gradient.npz
+++ b/GPy/testing/baseline/gplvm_gradient.npz
--- a/GPy/testing/baseline/gplvm_gradient.png
+++ b/GPy/testing/baseline/gplvm_gradient.png
--- a/GPy/testing/baseline/gplvm_latent.npz
+++ b/GPy/testing/baseline/gplvm_latent.npz
--- a/GPy/testing/baseline/gplvm_latent.png
+++ b/GPy/testing/baseline/gplvm_latent.png
--- a/GPy/testing/baseline/gplvm_latent_3d.npz
+++ b/GPy/testing/baseline/gplvm_latent_3d.npz
--- a/GPy/testing/baseline/gplvm_latent_3d.png
+++ b/GPy/testing/baseline/gplvm_latent_3d.png
--- a/GPy/testing/baseline/gplvm_magnification.npz
+++ b/GPy/testing/baseline/gplvm_magnification.npz
--- a/GPy/testing/baseline/gplvm_magnification.png
+++ b/GPy/testing/baseline/gplvm_magnification.png
--- a/GPy/testing/baseline/kern_ARD.npz
+++ b/GPy/testing/baseline/kern_ARD.npz
--- a/GPy/testing/baseline/kern_ARD.png
+++ b/GPy/testing/baseline/kern_ARD.png
--- a/GPy/testing/baseline/kern_cov_1d.npz
+++ b/GPy/testing/baseline/kern_cov_1d.npz
--- a/GPy/testing/baseline/kern_cov_1d.png
+++ b/GPy/testing/baseline/kern_cov_1d.png
--- a/GPy/testing/baseline/kern_cov_2d.npz
+++ b/GPy/testing/baseline/kern_cov_2d.npz
--- a/GPy/testing/baseline/kern_cov_2d.png
+++ b/GPy/testing/baseline/kern_cov_2d.png
--- a/GPy/testing/baseline/kern_cov_3d.npz
+++ b/GPy/testing/baseline/kern_cov_3d.npz
--- a/GPy/testing/baseline/kern_cov_3d.png
+++ b/GPy/testing/baseline/kern_cov_3d.png
--- a/GPy/testing/baseline/kern_cov_no_lim.npz
+++ b/GPy/testing/baseline/kern_cov_no_lim.npz
--- a/GPy/testing/baseline/kern_cov_no_lim.png
+++ b/GPy/testing/baseline/kern_cov_no_lim.png
--- a/GPy/testing/baseline/sparse_gp_class_likelihood.npz
+++ b/GPy/testing/baseline/sparse_gp_class_likelihood.npz
--- a/GPy/testing/baseline/sparse_gp_class_likelihood.png
+++ b/GPy/testing/baseline/sparse_gp_class_likelihood.png
--- a/GPy/testing/baseline/sparse_gp_class_raw.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw.npz
--- a/GPy/testing/baseline/sparse_gp_class_raw.png
+++ b/GPy/testing/baseline/sparse_gp_class_raw.png
--- a/GPy/testing/baseline/sparse_gp_class_raw_link.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw_link.npz
--- a/Show more
+++ b/Show more
		`@ -1 +1 @@`
			`[GPy Authors](https://github.com/SheffieldML/GPy/graphs/contributors)`				`GPy Authors: https://github.com/SheffieldML/GPy/graphs/contributors`