Merge pull request #656 from SheffieldML/devel

push PR to deploy
2026-06-11 15:15:15 +02:00 · 2018-09-02 23:13:52 +01:00 · 2018-09-02 23:13:52 +01:00 · ac4176721b
commit ac4176721b
parent d4bff7505e 0c6438608d
104 changed files with 3337 additions and 2055 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -19,6 +19,7 @@ env:
  #- PYTHON_VERSION=3.4
  - PYTHON_VERSION=3.5
  - PYTHON_VERSION=3.6
+  #- PYTHON_VERSION=3.7

 before_install:
 - wget https://github.com/mzwiessele/travis_scripts/raw/master/download_miniconda.sh
@ -51,7 +52,7 @@ after_success:
 before_deploy:
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]];
    then
-      export DIST='sdist';
+      export DIST='sdist bdist_rpm bdist_dumb';
    elif [[ "$TRAVIS_OS_NAME" == "osx" ]];
    then
      export DIST='bdist_wheel';
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/GPy/version.py
+++ b/GPy/version.py
@ -1 +1 @@
-__version__ = "1.9.2"
+__version__ = "1.9.5"
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -110,7 +110,14 @@ class GP(Model):
        self.posterior = None

    def to_dict(self, save_data=True):
-        input_dict = super(GP, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :param boolean save_data: if true, it adds the training data self.X and self.Y to the dictionary
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+        input_dict = super(GP, self)._save_to_input_dict()
        input_dict["class"] = "GPy.core.GP"
        if not save_data:
            input_dict["X"] = None
@ -137,14 +144,14 @@ class GP(Model):
        return input_dict

    @staticmethod
-    def _from_dict(input_dict, data=None):
+    def _format_input_dict(input_dict, data=None):
        import GPy
        import numpy as np
        if (input_dict['X'] is None) or (input_dict['Y'] is None):
            assert(data is not None)
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
        elif data is not None:
-            print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
+            warnings.warn("WARNING: The model has been saved with X,Y! The original values are being overridden!")
            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
        else:
            input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])
@ -166,6 +173,11 @@ class GP(Model):
            input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
        else:
            input_dict["normalizer"] = normalizer
+        return input_dict
+
+    @staticmethod
+    def _build_from_input_dict(input_dict, data=None):
+        input_dict = GP._format_input_dict(input_dict, data)
        return GP(**input_dict)

    def save_model(self, output_filename, compress=True, save_data=True):
@ -282,7 +294,7 @@ class GP(Model):
            mu += self.mean_function.f(Xnew)
        return mu, var

-    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None, 
+    def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None,
                likelihood=None, include_likelihood=True):
        """
        Predict the function(s) at the new point(s) Xnew. This includes the
@ -566,7 +578,7 @@ class GP(Model):
                mag[n] = np.sqrt(np.linalg.det(G[n, :, :]))
        return mag

-    def posterior_samples_f(self,X, size=10, full_cov=True, **predict_kwargs):
+    def posterior_samples_f(self,X, size=10, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

@ -574,35 +586,29 @@ class GP(Model):
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
-        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
-        :type full_cov: bool.
-        :returns: fsim: set of simulations
-        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
+        :returns: set of simulations
+        :rtype: np.ndarray (Nnew x D x samples) 
        """
-        m, v = self._raw_predict(X,  full_cov=full_cov, **predict_kwargs)
+        predict_kwargs["full_cov"] = True  # Always use the full covariance for posterior samples. 
+        m, v = self._raw_predict(X,  **predict_kwargs)
        if self.normalizer is not None:
            m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)

        def sim_one_dim(m, v):
-            if not full_cov:
-                return np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
-            else:
-                return np.random.multivariate_normal(m.flatten(), v, size).T
+            return np.random.multivariate_normal(m, v, size).T

        if self.output_dim == 1:
-            return sim_one_dim(m, v)
+            return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]
        else:
-            fsim = np.empty((self.output_dim, self.num_data, size))
+            fsim = np.empty((X.shape[0], self.output_dim, size))
            for d in range(self.output_dim):
-                if full_cov and v.ndim == 3:
-                    fsim[d] = sim_one_dim(m[:, d], v[:, :, d])
-                elif (not full_cov) and v.ndim == 2:
-                    fsim[d] = sim_one_dim(m[:, d], v[:, d])
+                if v.ndim == 3:
+                    fsim[:, d, :] = sim_one_dim(m[:, d], v[:, :, d])
                else:
-                    fsim[d] = sim_one_dim(m[:, d], v)
+                    fsim[:, d, :] = sim_one_dim(m[:, d], v)
        return fsim

-    def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None, likelihood=None, **predict_kwargs):
+    def posterior_samples(self, X, size=10, Y_metadata=None, likelihood=None, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

@ -610,19 +616,17 @@ class GP(Model):
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
-        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
-        :type full_cov: bool.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
-        fsim = self.posterior_samples_f(X, size, full_cov=full_cov, **predict_kwargs)
+        fsim = self.posterior_samples_f(X, size, **predict_kwargs)
        if likelihood is None:
            likelihood = self.likelihood
        if fsim.ndim == 3:
-            for d in range(fsim.shape[0]):
-                fsim[d] = likelihood.samples(fsim[d], Y_metadata=Y_metadata)
+            for d in range(fsim.shape[1]):
+                fsim[:, d] = likelihood.samples(fsim[:, d], Y_metadata=Y_metadata)
        else:
            fsim = likelihood.samples(fsim, Y_metadata=Y_metadata)
        return fsim
@ -643,7 +647,7 @@ class GP(Model):

        :param max_iters: maximum number of function evaluations
        :type max_iters: int
-        :messages: whether to display during optimisation
+        :param messages: whether to display during optimisation
        :type messages: bool
        :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
        :type optimizer: string
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@ -28,7 +28,7 @@ class Mapping(Parameterized):
    def to_dict(self):
        raise NotImplementedError

-    def _to_dict(self):
+    def _save_to_input_dict(self):
        input_dict = {}
        input_dict["input_dim"] = self.input_dim
        input_dict["output_dim"] = self.output_dim
@ -37,16 +37,27 @@ class Mapping(Parameterized):

    @staticmethod
    def from_dict(input_dict):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
        import copy
        input_dict = copy.deepcopy(input_dict)
        mapping_class = input_dict.pop('class')
        input_dict["name"] = str(input_dict["name"])
        import GPy
        mapping_class = eval(mapping_class)
-        return mapping_class._from_dict(mapping_class, input_dict)
+        return mapping_class._build_from_input_dict(mapping_class, input_dict)

    @staticmethod
-    def _from_dict(mapping_class, input_dict):
+    def _build_from_input_dict(mapping_class, input_dict):
        return mapping_class(**input_dict)


--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -8,7 +8,10 @@ class Model(ParamzModel, Priorizable):
    def __init__(self, name):
        super(Model, self).__init__(name)  # Parameterized.__init__(self)

-    def _to_dict(self):
+    def _save_to_input_dict(self):
+        """
+        It is used by the public method to_dict to create json serializable dictionary.
+        """
        input_dict = {}
        input_dict["name"] = self.name
        return input_dict
@ -18,16 +21,37 @@ class Model(ParamzModel, Priorizable):

    @staticmethod
    def from_dict(input_dict, data=None):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
        import copy
        input_dict = copy.deepcopy(input_dict)
        model_class = input_dict.pop('class')
        input_dict["name"] = str(input_dict["name"])
        import GPy
        model_class = eval(model_class)
-        return model_class._from_dict(input_dict, data)
+        return model_class._build_from_input_dict(input_dict, data)

    @staticmethod
-    def _from_dict(model_class, input_dict, data=None):
+    def _build_from_input_dict(model_class, input_dict, data=None):
+        """
+        This method is used by the public method from_dict to build an object
+        of class model_class using the information contained in input_dict.
+        Note: This method is often overrided in the derived class to deal with
+        any pre-processing of the parameters in input_dict before calling the
+        constructor of the object.
+
+        :param str model_class: Class of the object to build.
+        :param dict input_dict: Extra information needed by the constructor of model_class.
+        """
        return model_class(**input_dict)

    def save_model(self, output_filename, compress=True, save_data=True):
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -117,3 +117,26 @@ class SparseGP(GP):
            self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
            self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
        self._Zgrad = self.Z.gradient.copy()
+
+    def to_dict(self, save_data=True):
+        """
+        Convert the object into a json serializable dictionary.
+
+        :param boolean save_data: if true, it adds the training data self.X and self.Y to the dictionary
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+        input_dict = super(SparseGP, self).to_dict(save_data)
+        input_dict["class"] = "GPy.core.SparseGP"
+        input_dict["Z"] = self.Z.tolist()
+        return input_dict
+
+    @staticmethod
+    def _format_input_dict(input_dict, data=None):
+        input_dict = GP._format_input_dict(input_dict, data)
+        input_dict["Z"] = np.array(input_dict["Z"])
+        return input_dict
+
+    @staticmethod
+    def _build_from_input_dict(input_dict, data=None):
+        input_dict = SparseGP._format_input_dict(input_dict, data)
+        return SparseGP(**input_dict)
--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -41,7 +41,7 @@ class LatentFunctionInference(object):
        """
        pass

-    def _to_dict(self):
+    def _save_to_input_dict(self):
        input_dict = {}
        return input_dict

@ -50,15 +50,27 @@ class LatentFunctionInference(object):

    @staticmethod
    def from_dict(input_dict):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
+
        import copy
        input_dict = copy.deepcopy(input_dict)
        inference_class = input_dict.pop('class')
        import GPy
        inference_class = eval(inference_class)
-        return inference_class._from_dict(inference_class, input_dict)
+        return inference_class._build_from_input_dict(inference_class, input_dict)

    @staticmethod
-    def _from_dict(inference_class, input_dict):
+    def _build_from_input_dict(inference_class, input_dict):
        return inference_class(**input_dict)

 class InferenceMethodList(LatentFunctionInference, list):
@ -82,6 +94,7 @@ class InferenceMethodList(LatentFunctionInference, list):
            self.append(inf)

 from .exact_gaussian_inference import ExactGaussianInference
+from .exact_studentt_inference import ExactStudentTInference
 from .laplace import Laplace,LaplaceBlock
 from GPy.inference.latent_function_inference.var_dtc import VarDTC
 from .expectation_propagation import EP, EPDTC
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@ -22,7 +22,15 @@ class ExactGaussianInference(LatentFunctionInference):
        pass#self._YYTfactor_cache = caching.cache()

    def to_dict(self):
-        input_dict = super(ExactGaussianInference, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(ExactGaussianInference, self)._save_to_input_dict()
        input_dict["class"] = "GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference"
        return input_dict

--- a/GPy/inference/latent_function_inference/exact_studentt_inference.py
+++ b/GPy/inference/latent_function_inference/exact_studentt_inference.py
@ -0,0 +1,49 @@
+# Copyright (c) 2017, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from . import LatentFunctionInference
+from .posterior import StudentTPosterior
+from ...util.linalg import pdinv, dpotrs, tdot
+from ...util import diag
+
+import numpy as np
+from scipy.special import gammaln, digamma
+
+
+class ExactStudentTInference(LatentFunctionInference):
+    """
+    An object for inference of student-t processes (not for GP with student-t likelihood!).
+
+    The function self.inference returns a StudentTPosterior object, which summarizes
+    the posterior.
+    """
+
+    def inference(self, kern, X, Y, nu, mean_function=None, K=None):
+        m = 0 if mean_function is None else mean_function.f(X)
+        K = kern.K(X) if K is None else K
+
+        YYT_factor = Y - m
+        Ky = K.copy()
+        diag.add(Ky, 1e-8)
+
+        # Posterior representation
+        Wi, LW, LWi, W_logdet = pdinv(Ky)
+        alpha, _ = dpotrs(LW, YYT_factor, lower=1)
+        beta = np.sum(alpha * YYT_factor)
+        posterior = StudentTPosterior(nu, woodbury_chol=LW, woodbury_vector=alpha, K=K)
+
+        # Log marginal
+        N = Y.shape[0]
+        D = Y.shape[1]
+        log_marginal = 0.5 * (-N * np.log((nu - 2) * np.pi) - W_logdet - (nu + N) * np.log(1 + beta / (nu - 2)))
+        log_marginal += gammaln((nu + N) / 2) - gammaln(nu / 2)
+
+        # Gradients
+        dL_dK = 0.5 * ((nu + N) / (nu + beta - 2) * tdot(alpha) - D * Wi)
+        dL_dnu = -N / (nu - 2.) + digamma(0.5 * (nu + N)) - digamma(0.5 * nu)
+        dL_dnu -= np.log(1 + beta / (nu - 2.))
+        dL_dnu += ((nu + N) * beta) / ((nu - 2) * (beta + nu - 2))
+        dL_dnu *= 0.5
+        gradients = {'dL_dK': dL_dK, 'dL_dnu': dL_dnu, 'dL_dm': alpha}
+
+        return posterior, log_marginal, gradients
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -28,6 +28,14 @@ class cavityParams(object):
        self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
        self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
    def to_dict(self):
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
    @staticmethod
    def from_dict(input_dict):
@ -59,6 +67,14 @@ class gaussianApproximation(object):

        return (delta_tau, delta_v)
    def to_dict(self):
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
    @staticmethod
    def from_dict(input_dict):
@ -89,6 +105,14 @@ class posteriorParams(posteriorParamsBase):
        DSYR(self.Sigma, si, -ci)

    def to_dict(self):
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
        #TODO: Implement a more memory efficient variant
        if self.L is None:
            return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist()}
@ -132,6 +156,21 @@ class posteriorParamsDTC(posteriorParamsBase):
        self.mu += (delta_v-delta_tau*self.mu[i])*si
        #mu = np.dot(Sigma, v_tilde)

+    def to_dict(self):
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        return { "mu": self.mu.tolist(), "Sigma_diag": self.Sigma_diag.tolist()}
+
+    @staticmethod
+    def from_dict(input_dict):
+        return posteriorParamsDTC(np.array(input_dict["mu"]), np.array(input_dict["Sigma_diag"]))
+
    @staticmethod
    def _recompute(LLT0, Kmn, ga_approx):
        LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
@ -198,8 +237,8 @@ class EPBase(object):
    def __getstate__(self):
        return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]

-    def _to_dict(self):
-        input_dict = super(EPBase, self)._to_dict()
+    def _save_to_input_dict(self):
+        input_dict = super(EPBase, self)._save_to_input_dict()
        input_dict["epsilon"]=self.epsilon
        input_dict["eta"]=self.eta
        input_dict["delta"]=self.delta
@ -363,7 +402,15 @@ class EP(EPBase, ExactGaussianInference):
        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

    def to_dict(self):
-        input_dict = super(EP, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(EP, self)._save_to_input_dict()
        input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EP"
        if self.ga_approx_old is not  None:
            input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
@ -377,7 +424,7 @@ class EP(EPBase, ExactGaussianInference):
        return input_dict

    @staticmethod
-    def _from_dict(inference_class, input_dict):
+    def _build_from_input_dict(inference_class, input_dict):
        ga_approx_old = input_dict.pop('ga_approx_old', None)
        if ga_approx_old is not None:
            ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
@ -395,7 +442,7 @@ class EP(EPBase, ExactGaussianInference):

 class EPDTC(EPBase, VarDTC):
    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
-        if self.always_reset:
+        if self.always_reset and not self.loading:
            self.reset()

        num_data, output_dim = Y.shape
@ -413,11 +460,11 @@ class EPDTC(EPBase, VarDTC):
        else:
            Kmn = psi1.T

-        if self.ep_mode=="nested":
+        if self.ep_mode=="nested" and not self.loading:
            #Force EP at each step of the optimization
            self._ep_approximation = None
            post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
-        elif self.ep_mode=="alternated":
+        elif self.ep_mode=="alternated" or self.loading:
            if getattr(self, '_ep_approximation', None) is None:
                #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
                post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
@ -427,6 +474,8 @@ class EPDTC(EPBase, VarDTC):
        else:
            raise ValueError("ep_mode value not valid")

+        self.loading = False
+
        mu_tilde = ga_approx.v / ga_approx.tau.astype(float)

        return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]),
@ -533,3 +582,41 @@ class EPDTC(EPBase, VarDTC):
            #Posterior distribution parameters update
            if self.parallel_updates == False:
                post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)
+
+
+    def to_dict(self):
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(EPDTC, self)._save_to_input_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EPDTC"
+        if self.ga_approx_old is not  None:
+            input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
+        if self._ep_approximation is not  None:
+            input_dict["_ep_approximation"] = {}
+            input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
+            input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
+            input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[2]
+
+        return input_dict
+
+    @staticmethod
+    def _build_from_input_dict(inference_class, input_dict):
+        ga_approx_old = input_dict.pop('ga_approx_old', None)
+        if ga_approx_old is not None:
+            ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
+        _ep_approximation_dict = input_dict.pop('_ep_approximation', None)
+        _ep_approximation = []
+        if _ep_approximation is not None:
+            _ep_approximation.append(posteriorParamsDTC.from_dict(_ep_approximation_dict["post_params"]))
+            _ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
+            _ep_approximation.append(_ep_approximation_dict["log_Z_tilde"])
+        ee = EPDTC(**input_dict)
+        ee.ga_approx_old = ga_approx_old
+        ee._ep_approximation = _ep_approximation
+        return ee
--- a/GPy/inference/latent_function_inference/posterior.py
+++ b/GPy/inference/latent_function_inference/posterior.py
@ -5,6 +5,7 @@ import numpy as np
 from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol, dtrtrs, tdot
 from GPy.core.parameterization.variational import VariationalPosterior

+
 class Posterior(object):
    """
    An object to represent a Gaussian posterior over latent function values, p(f|D).
@ -16,7 +17,9 @@ class Posterior(object):
    the function at any new point x_* by integrating over this posterior.

    """
-    def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
+
+    def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None,
+                 woodbury_inv=None, prior_mean=0):
        """
        woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
        woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
@ -44,33 +47,33 @@ class Posterior(object):
        compute all other quantites on demand.

        """
-        #obligatory
+        # obligatory
        self._K = K

-        if ((woodbury_chol is not None) and (woodbury_vector is not None))\
-                or ((woodbury_inv is not None) and (woodbury_vector is not None))\
-                or ((woodbury_inv is not None) and (mean is not None))\
+        if ((woodbury_chol is not None) and (woodbury_vector is not None)) \
+                or ((woodbury_inv is not None) and (woodbury_vector is not None)) \
+                or ((woodbury_inv is not None) and (mean is not None)) \
                or ((mean is not None) and (cov is not None)):
-            pass # we have sufficient to compute the posterior
+            pass  # we have sufficient to compute the posterior
        else:
            raise ValueError("insufficient information to compute the posterior")

        self._K_chol = K_chol
        self._K = K
-        #option 1:
+        # option 1:
        self._woodbury_chol = woodbury_chol
        self._woodbury_vector = woodbury_vector

-        #option 2.
+        # option 2.
        self._woodbury_inv = woodbury_inv
-        #and woodbury vector
+        # and woodbury vector

-        #option 2:
+        # option 2:
        self._mean = mean
        self._covariance = cov
        self._prior_mean = prior_mean

-        #compute this lazily
+        # compute this lazily
        self._precision = None

    @property
@ -96,9 +99,11 @@ class Posterior(object):
        $$
        """
        if self._covariance is None:
-            #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
-            self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
-            #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
+            # LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
+            self._covariance = (
+            np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K,
+                                                  [1, 0]).T).squeeze()
+            # self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
        return self._covariance

    def covariance_between_points(self, kern, X, X1, X2):
@ -131,9 +136,9 @@ class Posterior(object):
        """
        if self._precision is None:
            cov = np.atleast_3d(self.covariance)
-            self._precision = np.zeros(cov.shape) # if one covariance per dimension
+            self._precision = np.zeros(cov.shape)  # if one covariance per dimension
            for p in range(cov.shape[-1]):
-                self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
+                self._precision[:, :, p] = pdinv(cov[:, :, p])[0]
        return self._precision

    @property
@ -146,18 +151,18 @@ class Posterior(object):
        $$
        """
        if self._woodbury_chol is None:
-            #compute woodbury chol from
+            # compute woodbury chol from
            if self._woodbury_inv is not None:
                winv = np.atleast_3d(self._woodbury_inv)
                self._woodbury_chol = np.zeros(winv.shape)
                for p in range(winv.shape[-1]):
-                    self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
-                #Li = jitchol(self._woodbury_inv)
-                #self._woodbury_chol, _ = dtrtri(Li)
-                #W, _, _, _, = pdinv(self._woodbury_inv)
-                #symmetrify(W)
-                #self._woodbury_chol = jitchol(W)
-            #try computing woodbury chol from cov
+                    self._woodbury_chol[:, :, p] = pdinv(winv[:, :, p])[2]
+                    # Li = jitchol(self._woodbury_inv)
+                    # self._woodbury_chol, _ = dtrtri(Li)
+                    # W, _, _, _, = pdinv(self._woodbury_inv)
+                    # symmetrify(W)
+                    # self._woodbury_chol = jitchol(W)
+            # try computing woodbury chol from cov
            elif self._covariance is not None:
                raise NotImplementedError("TODO: check code here")
                B = self._K - self._covariance
@ -180,14 +185,14 @@ class Posterior(object):
        if self._woodbury_inv is None:
            if self._woodbury_chol is not None:
                self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
-                #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
+                # self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
                symmetrify(self._woodbury_inv)
            elif self._covariance is not None:
                B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
                self._woodbury_inv = np.empty_like(B)
                for i in range(B.shape[-1]):
-                    tmp, _ = dpotrs(self.K_chol, B[:,:,i])
-                    self._woodbury_inv[:,:,i], _ = dpotrs(self.K_chol, tmp.T)
+                    tmp, _ = dpotrs(self.K_chol, B[:, :, i])
+                    self._woodbury_inv[:, :, i], _ = dpotrs(self.K_chol, tmp.T)
        return self._woodbury_inv

    @property
@ -219,14 +224,14 @@ class Posterior(object):
        if not isinstance(Xnew, VariationalPosterior):
            Kx = kern.K(pred_var, Xnew)
            mu = np.dot(Kx.T, woodbury_vector)
-            if len(mu.shape)==1:
-                mu = mu.reshape(-1,1)
+            if len(mu.shape) == 1:
+                mu = mu.reshape(-1, 1)
            if full_cov:
                Kxx = kern.K(Xnew)
                if woodbury_inv.ndim == 2:
                    var = Kxx - np.dot(Kx.T, np.dot(woodbury_inv, Kx))
-                elif woodbury_inv.ndim == 3: # Missing data
-                    var = np.empty((Kxx.shape[0],Kxx.shape[1],woodbury_inv.shape[2]))
+                elif woodbury_inv.ndim == 3:  # Missing data
+                    var = np.empty((Kxx.shape[0], Kxx.shape[1], woodbury_inv.shape[2]))
                    from ...util.linalg import mdot
                    for i in range(var.shape[2]):
                        var[:, :, i] = (Kxx - mdot(Kx.T, woodbury_inv[:, :, i], Kx))
@ -234,9 +239,9 @@ class Posterior(object):
            else:
                Kxx = kern.Kdiag(Xnew)
                if woodbury_inv.ndim == 2:
-                    var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:,None]
-                elif woodbury_inv.ndim == 3: # Missing data
-                    var = np.empty((Kxx.shape[0],woodbury_inv.shape[2]))
+                    var = (Kxx - np.sum(np.dot(woodbury_inv.T, Kx) * Kx, 0))[:, None]
+                elif woodbury_inv.ndim == 3:  # Missing data
+                    var = np.empty((Kxx.shape[0], woodbury_inv.shape[2]))
                    for i in range(var.shape[1]):
                        var[:, i] = (Kxx - (np.sum(np.dot(woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
                var = var
@ -245,86 +250,99 @@ class Posterior(object):
            psi1_star = kern.psi1(pred_var, Xnew)
            psi2_star = kern.psi2n(pred_var, Xnew)
            la = woodbury_vector
-            mu = np.dot(psi1_star, la) # TODO: dimensions?
-            N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
+            mu = np.dot(psi1_star, la)  # TODO: dimensions?
+            N, M, D = psi0_star.shape[0], psi1_star.shape[1], la.shape[1]

            if full_cov:
-                raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
+                raise NotImplementedError(
+                    "Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
                var = np.zeros((Xnew.shape[0], la.shape[1], la.shape[1]))
                di = np.diag_indices(la.shape[1])
            else:
-                tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
-                var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
-                if woodbury_inv.ndim==2:
-                    var += -psi2_star.reshape(N,-1).dot(woodbury_inv.flat)[:,None]
+                tmp = psi2_star - psi1_star[:, :, None] * psi1_star[:, None, :]
+                var = (tmp.reshape(-1, M).dot(la).reshape(N, M, D) * la[None, :, :]).sum(1) + psi0_star[:, None]
+                if woodbury_inv.ndim == 2:
+                    var += -psi2_star.reshape(N, -1).dot(woodbury_inv.flat)[:, None]
                else:
-                    var += -psi2_star.reshape(N,-1).dot(woodbury_inv.reshape(-1,D))
-        var = np.clip(var,1e-15,np.inf)
+                    var += -psi2_star.reshape(N, -1).dot(woodbury_inv.reshape(-1, D))
+        var = np.clip(var, 1e-15, np.inf)
        return mu, var


 class PosteriorExact(Posterior):
-
    def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):

        Kx = kern.K(pred_var, Xnew)
        mu = np.dot(Kx.T, self.woodbury_vector)
-        if len(mu.shape)==1:
-            mu = mu.reshape(-1,1)
+        if len(mu.shape) == 1:
+            mu = mu.reshape(-1, 1)
        if full_cov:
            Kxx = kern.K(Xnew)
            if self._woodbury_chol.ndim == 2:
                tmp = dtrtrs(self._woodbury_chol, Kx)[0]
                var = Kxx - tdot(tmp.T)
-            elif self._woodbury_chol.ndim == 3: # Missing data
-                var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_chol.shape[2]))
+            elif self._woodbury_chol.ndim == 3:  # Missing data
+                var = np.empty((Kxx.shape[0], Kxx.shape[1], self._woodbury_chol.shape[2]))
                for i in range(var.shape[2]):
-                    tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
+                    tmp = dtrtrs(self._woodbury_chol[:, :, i], Kx)[0]
                    var[:, :, i] = (Kxx - tdot(tmp.T))
            var = var
        else:
            Kxx = kern.Kdiag(Xnew)
            if self._woodbury_chol.ndim == 2:
                tmp = dtrtrs(self._woodbury_chol, Kx)[0]
-                var = (Kxx - np.square(tmp).sum(0))[:,None]
-            elif self._woodbury_chol.ndim == 3: # Missing data
-                var = np.empty((Kxx.shape[0],self._woodbury_chol.shape[2]))
+                var = (Kxx - np.square(tmp).sum(0))[:, None]
+            elif self._woodbury_chol.ndim == 3:  # Missing data
+                var = np.empty((Kxx.shape[0], self._woodbury_chol.shape[2]))
                for i in range(var.shape[1]):
-                    tmp = dtrtrs(self._woodbury_chol[:,:,i], Kx)[0]
+                    tmp = dtrtrs(self._woodbury_chol[:, :, i], Kx)[0]
                    var[:, i] = (Kxx - np.square(tmp).sum(0))
            var = var
        return mu, var

-class PosteriorEP(Posterior):

+class PosteriorEP(Posterior):
    def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):

        Kx = kern.K(pred_var, Xnew)
        mu = np.dot(Kx.T, self.woodbury_vector)
-        if len(mu.shape)==1:
-            mu = mu.reshape(-1,1)
+        if len(mu.shape) == 1:
+            mu = mu.reshape(-1, 1)

        if full_cov:
            Kxx = kern.K(Xnew)
            if self._woodbury_inv.ndim == 2:
-                tmp = np.dot(Kx.T,np.dot(self._woodbury_inv, Kx))
+                tmp = np.dot(Kx.T, np.dot(self._woodbury_inv, Kx))
                var = Kxx - tmp
-            elif self._woodbury_inv.ndim == 3: # Missing data
-                var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_inv.shape[2]))
+            elif self._woodbury_inv.ndim == 3:  # Missing data
+                var = np.empty((Kxx.shape[0], Kxx.shape[1], self._woodbury_inv.shape[2]))
                for i in range(var.shape[2]):
-                    tmp = np.dot(Kx.T,np.dot(self._woodbury_inv[:,:,i], Kx))
+                    tmp = np.dot(Kx.T, np.dot(self._woodbury_inv[:, :, i], Kx))
                    var[:, :, i] = (Kxx - tmp)
            var = var
        else:
            Kxx = kern.Kdiag(Xnew)
            if self._woodbury_inv.ndim == 2:
                tmp = (np.dot(self._woodbury_inv, Kx) * Kx).sum(0)
-                var = (Kxx - tmp)[:,None]
-            elif self._woodbury_inv.ndim == 3: # Missing data
-                var = np.empty((Kxx.shape[0],self._woodbury_inv.shape[2]))
+                var = (Kxx - tmp)[:, None]
+            elif self._woodbury_inv.ndim == 3:  # Missing data
+                var = np.empty((Kxx.shape[0], self._woodbury_inv.shape[2]))
                for i in range(var.shape[1]):
-                    tmp = (Kx * np.dot(self._woodbury_inv[:,:,i], Kx)).sum(0)
+                    tmp = (Kx * np.dot(self._woodbury_inv[:, :, i], Kx)).sum(0)
                    var[:, i] = (Kxx - tmp)
            var = var

        return mu, var
+
+
+class StudentTPosterior(PosteriorExact):
+    def __init__(self, deg_free, **kwargs):
+        super(StudentTPosterior, self).__init__(**kwargs)
+        self.nu = deg_free
+
+    def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
+        mu, var = super(StudentTPosterior, self)._raw_predict(kern, Xnew, pred_var, full_cov)
+        beta = np.sum(self.woodbury_vector * self.mean)
+        N = self.woodbury_vector.shape[0]
+        tp_var_scale = (self.nu + beta - 2) / (self.nu + N - 2)
+        return mu, tp_var_scale * var
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -34,6 +34,7 @@ from .src.splitKern import DEtime as DiffGenomeKern
 from .src.spline import Spline
 from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel, PolynomialBasisFuncKernel
 from .src.grid_kerns import GridRBF
+from .src.symmetric import Symmetric

 from .src.sde_matern import sde_Matern32
 from .src.sde_matern import sde_Matern52
--- a/GPy/kern/_src/sde_brownian.py
+++ b/GPy/kern/_src/sde_brownian.py
@ -1,57 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Brownian motion covariance function with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-
-from .brownian import Brownian
-
-import numpy as np
-
-class sde_Brownian(Brownian):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Linear kernel:
-
-    .. math::
-
-       k(x,y) = \sigma^2 min(x,y)
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values) # this is initial variancve in Bayesian linear regression
-        
-        F = np.array( ((0,1.0),(0,0) ))
-        L = np.array( ((1.0,),(0,)) )
-        Qc = np.array( ((variance,),) )
-        H = np.array( ((1.0,0),) )
-        
-        Pinf   = np.array( ( (0, -0.5*variance ), (-0.5*variance, 0) ) )
-        #P0 = Pinf.copy() 
-        P0 = np.zeros((2,2))   
-        #Pinf   = np.array( ( (t0, 1.0), (1.0, 1.0/t0) ) ) * variance
-        dF = np.zeros((2,2,1))
-        dQc    = np.ones( (1,1,1) )
-        
-        dPinf = np.zeros((2,2,1))
-        dPinf[:,:,0] = np.array( ( (0, -0.5), (-0.5, 0) ) )
-        #dP0 = dPinf.copy() 
-        dP0 = np.zeros((2,2,1))
-  
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_linear.py
+++ b/GPy/kern/_src/sde_linear.py
@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Linear covariance function with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .linear import Linear
-
-import numpy as np
-
-class sde_Linear(Linear):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Linear kernel:
-
-    .. math::
-
-       k(x,y) = \sum_{i=1}^{input dim} \sigma^2_i x_iy_i
-
-    """
-    def __init__(self, input_dim, X, variances=None, ARD=False, active_dims=None, name='linear'):
-        """
-        Modify the init method, because one extra parameter is required. X - points
-        on the X axis.
-        """
-        
-        super(sde_Linear, self).__init__(input_dim, variances, ARD, active_dims, name)
-        
-        self.t0 = np.min(X)
-        
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variances.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variances.values) # this is initial variancve in Bayesian linear regression
-        t0 = float(self.t0)
-        
-        F = np.array( ((0,1.0),(0,0) ))
-        L = np.array( ((0,),(1.0,)) )
-        Qc = np.zeros((1,1))
-        H = np.array( ((1.0,0),) )
-        
-        Pinf   = np.zeros((2,2))
-        P0 = np.array( ( (t0**2, t0), (t0, 1) ) ) * variance        
-        dF = np.zeros((2,2,1))
-        dQc    = np.zeros( (1,1,1) )
-        
-        dPinf = np.zeros((2,2,1))
-        dP0 = np.zeros((2,2,1))
-        dP0[:,:,0]  = P0 / variance
-  
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_matern.py
+++ b/GPy/kern/_src/sde_matern.py
@ -1,135 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Matern covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .stationary import Matern32
-from .stationary import Matern52
-import numpy as np
-
-class sde_Matern32(Matern32):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Matern 3/2 kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 (1 + \sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale.values)
-        
-        foo  = np.sqrt(3.)/lengthscale 
-        F    = np.array(((0, 1.0), (-foo**2, -2*foo))) 
-        L    = np.array(( (0,), (1.0,) ))
-        Qc   = np.array(((12.*np.sqrt(3) / lengthscale**3 * variance,),)) 
-        H    = np.array(((1.0, 0),)) 
-        Pinf = np.array(((variance, 0.0), (0.0, 3.*variance/(lengthscale**2))))
-        P0 = Pinf.copy()
-        
-        # Allocate space for the derivatives 
-        dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        # The partial derivatives 
-        dFvariance       = np.zeros((2,2)) 
-        dFlengthscale    = np.array(((0,0), (6./lengthscale**3,2*np.sqrt(3)/lengthscale**2))) 
-        dQcvariance      = np.array((12.*np.sqrt(3)/lengthscale**3)) 
-        dQclengthscale   = np.array((-3*12*np.sqrt(3)/lengthscale**4*variance)) 
-        dPinfvariance    = np.array(((1,0),(0,3./lengthscale**2))) 
-        dPinflengthscale = np.array(((0,0), (0,-6*variance/lengthscale**3))) 
-        # Combine the derivatives 
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinfvariance 
-        dPinf[:,:,1] = dPinflengthscale 
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-class sde_Matern52(Matern52):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Matern 5/2 kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 (1 + \sqrt{5} r + \frac{5}{3}r^2) \exp(- \sqrt{5} r) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale.values)
-
-        lamda = np.sqrt(5.0)/lengthscale
-        kappa = 5.0/3.0*variance/lengthscale**2        
-        
-        F = np.array(((0, 1,0), (0, 0, 1), (-lamda**3, -3.0*lamda**2, -3*lamda)))
-        L = np.array(((0,),(0,),(1,)))
-        Qc = np.array((((variance*400.0*np.sqrt(5.0)/3.0/lengthscale**5),),))
-        H = np.array(((1,0,0),))        
-        
-        Pinf = np.array(((variance,0,-kappa), (0, kappa, 0), (-kappa, 0, 25.0*variance/lengthscale**4)))
-        P0 = Pinf.copy()
-        # Allocate space for the derivatives         
-        dF = np.empty((3,3,2))        
-        dQc = np.empty((1,1,2))        
-        dPinf = np.empty((3,3,2))
-        
-         # The partial derivatives 
-        dFvariance = np.zeros((3,3))
-        dFlengthscale = np.array(((0,0,0),(0,0,0),(15.0*np.sqrt(5.0)/lengthscale**4, 
-                                   30.0/lengthscale**3, 3*np.sqrt(5.0)/lengthscale**2)))
-        dQcvariance = np.array((((400*np.sqrt(5)/3/lengthscale**5,),)))
-        dQclengthscale = np.array((((-variance*2000*np.sqrt(5)/3/lengthscale**6,),)))        
-        
-        dPinf_variance = Pinf/variance
-        kappa2 = -2.0*kappa/lengthscale
-        dPinf_lengthscale = np.array(((0,0,-kappa2),(0,kappa2,0),(-kappa2, 
-                                    0,-100*variance/lengthscale**5)))        
-        # Combine the derivatives 
-        dF[:,:,0] = dFvariance
-        dF[:,:,1] = dFlengthscale        
-        dQc[:,:,0] = dQcvariance         
-        dQc[:,:,1] = dQclengthscale        
-        dPinf[:,:,0] = dPinf_variance
-        dPinf[:,:,1] = dPinf_lengthscale
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)  
--- a/GPy/kern/_src/sde_standard_periodic.py
+++ b/GPy/kern/_src/sde_standard_periodic.py
@ -1,178 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Matern covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .standard_periodic import StdPeriodic
-
-import numpy as np
-import scipy as sp
-
-from scipy import special as special
-
-class sde_StdPeriodic(StdPeriodic):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Standard Periodic kernel:
-
-    .. math::
-
-       k(x,y) = \theta_1 \exp \left[  - \frac{1}{2} {}\sum_{i=1}^{input\_dim}  
-       \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.wavelengths.gradient = gradients[1]
-        self.lengthscales.gradient = gradients[2]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance.
-        
-        
-        ! Note: one must constrain lengthscale not to drop below 0.25.
-        After this bessel functions of the first kind grows to very high.
-        
-        ! Note: one must keep wevelength also not very low. Because then
-        the gradients wrt wavelength become ustable. 
-        However this might depend on the data. For test example with
-        300 data points the low limit is 0.15.
-        """ 
-        
-        # Params to use: (in that order)
-        #self.variance
-        #self.wavelengths
-        #self.lengthscales
-        N = 7 # approximation order        
-        
-        
-        w0 = 2*np.pi/self.wavelengths # frequency
-        lengthscales = 2*self.lengthscales         
-        
-        [q2,dq2l] = seriescoeff(N,lengthscales,self.variance)        
-        # lengthscale is multiplied by 2 because of slightly different
-        # formula for periodic covariance function.
-        # For the same reason:
-        
-        dq2l = 2*dq2l
-        
-        if np.any( np.isfinite(q2) == False):
-            raise ValueError("SDE periodic covariance error 1")
-        
-        if np.any( np.isfinite(dq2l) == False):
-            raise ValueError("SDE periodic covariance error 2")
-        
-        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
-        L    = np.eye(2*(N+1))
-        Qc   = np.zeros((2*(N+1), 2*(N+1)))
-        P_inf = np.kron(np.diag(q2),np.eye(2))
-        H    = np.kron(np.ones((1,N+1)),np.array((1,0)) )
-        P0 = P_inf.copy()
-        
-        # Derivatives
-        dF = np.empty((F.shape[0], F.shape[1], 3))
-        dQc = np.empty((Qc.shape[0], Qc.shape[1], 3))
-        dP_inf = np.empty((P_inf.shape[0], P_inf.shape[1], 3))         
-        
-        # Derivatives wrt self.variance
-        dF[:,:,0] = np.zeros(F.shape)
-        dQc[:,:,0] = np.zeros(Qc.shape)
-        dP_inf[:,:,0] = P_inf / self.variance
-
-        # Derivatives self.wavelengths
-        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.wavelengths );
-        dQc[:,:,1] = np.zeros(Qc.shape)
-        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
-        
-        # Derivatives self.lengthscales        
-        dF[:,:,2] = np.zeros(F.shape)
-        dQc[:,:,2] = np.zeros(Qc.shape)
-        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
-        dP0 = dP_inf.copy()
-
-        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
-        
-        
-        
-        
-def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
-    """
-    Calculate the coefficients q_j^2 for the covariance function 
-    approximation:
-    
-        k(\tau) =  \sum_{j=0}^{+\infty} q_j^2 \cos(j\omega_0 \tau)
-    
-    Reference is:
-
-    [1] Arno Solin and Simo Särkkä (2014). Explicit link between periodic 
-        covariance functions and state space models. In Proceedings of the 
-        Seventeenth International Conference on Artifcial Intelligence and 
-        Statistics (AISTATS 2014). JMLR: W&CP, volume 33.    
-    
-    Note! Only the infinite approximation (through Bessel function) 
-          is currently implemented.
-
-    Input:
-    ----------------
-    
-    m: int
-        Degree of approximation. Default 6.
-    lengthScale: float
-        Length scale parameter in the kerenl
-    magnSigma2:float
-        Multiplier in front of the kernel.
-        
-    
-    Output:
-    -----------------
-    
-    coeffs: array(m+1)
-        Covariance series coefficients
-    
-    coeffs_dl: array(m+1)
-        Derivatives of the coefficients with respect to lengthscale.
-    
-    """
-    
-    if true_covariance:
-        
-        bb = lambda j,m: (1.0 + np.array((j != 0), dtype=np.float64) ) / (2**(j)) *\
-            sp.special.binom(j, sp.floor( (j-m)/2.0 * np.array(m<=j, dtype=np.float64) ))*\
-            np.array(m<=j, dtype=np.float64) *np.array(sp.mod(j-m,2)==0, dtype=np.float64)
-                
-        M,J = np.meshgrid(range(0,m+1),range(0,m+1))
-        
-        coeffs = bb(J,M) / sp.misc.factorial(J) * sp.exp( -lengthScale**(-2) ) *\
-             (lengthScale**(-2))**J  *magnSigma2
-        
-        coeffs_dl = np.sum( coeffs*lengthScale**(-3)*(2.0-2.0*J*lengthScale**2),0)         
-        
-        coeffs = np.sum(coeffs,0)
-        
-    else:
-        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
-        if np.any( np.isfinite(coeffs) == False):
-            raise ValueError("sde_standard_periodic: Coefficients are not finite!")
-            #import pdb; pdb.set_trace()
-        coeffs[0] = 0.5*coeffs[0]
-        
-        # Derivatives wrt (lengthScale)
-        coeffs_dl = np.zeros(m+1)
-        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
-        (-4*special.iv(range(0,m),lengthScale**(-2)) + 4*(1+np.arange(1,m+1)*lengthScale**(2))*special.iv(range(1,m+1),lengthScale**(-2)) )    
-            
-        # The first element
-        coeffs_dl[0] = magnSigma2*lengthScale**(-3) * np.exp(-lengthScale**(-2))*\
-            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
-        
-
-    return coeffs, coeffs_dl
--- a/GPy/kern/_src/sde_static.py
+++ b/GPy/kern/_src/sde_static.py
@ -1,101 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance Static covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .static import White
-from .static import Bias
-
-import numpy as np
-
-class sde_White(White):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    White kernel:
-
-    .. math::
-
-       k(x,y) = \alpha*\delta(x-y)
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        variance = float(self.variance.values) 
-        
-        F = np.array( ((-np.inf,),) )
-        L = np.array( ((1.0,),)  )
-        Qc = np.array( ((variance,),)  )
-        H = np.array( ((1.0,),) )
-        
-        Pinf   = np.array( ((variance,),)  )
-        P0 = Pinf.copy()     
-        
-        dF = np.zeros((1,1,1))
-        dQc = np.zeros((1,1,1))
-        dQc[:,:,0]    = np.array( ((1.0,),) )
-        
-        dPinf = np.zeros((1,1,1))
-        dPinf[:,:,0] = np.array( ((1.0,),) )
-        dP0 = dPinf.copy()
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-
-class sde_Bias(Bias):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE forrm.
-    
-    Bias kernel:
-
-    .. math::
-
-       k(x,y) = \alpha
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        variance = float(self.variance.values) 
-        
-        F = np.array( ((0.0,),))
-        L = np.array( ((1.0,),))
-        Qc = np.zeros((1,1))
-        H = np.array( ((1.0,),))
-        
-        Pinf   = np.zeros((1,1))
-        P0 = np.array( ((variance,),) )      
-        
-        dF = np.zeros((1,1,1))
-        dQc    = np.zeros((1,1,1))
-        
-        dPinf = np.zeros((1,1,1))
-        dP0 = np.zeros((1,1,1))
-        dP0[:,:,0] = np.array( ((1.0,),) )
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
--- a/GPy/kern/_src/sde_stationary.py
+++ b/GPy/kern/_src/sde_stationary.py
@ -1,194 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Classes in this module enhance several stationary covariance functions with the
-Stochastic Differential Equation (SDE) functionality.
-"""
-from .rbf import RBF
-from .stationary import Exponential
-from .stationary import RatQuad
-
-import numpy as np
-import scipy as sp
-try:
-    from scipy.linalg import solve_continuous_lyapunov as lyap
-except ImportError:
-    from scipy.linalg import solve_lyapunov as lyap
-
-class sde_RBF(RBF):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Radial Basis Function kernel:
-
-    .. math::
-
-        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        N = 10# approximation order ( number of terms in exponent series expansion)
-        roots_rounding_decimals = 6
-        
-        fn = np.math.factorial(N)        
-        
-        kappa = 1.0/2.0/self.lengthscale**2
-       
-        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
-       
-        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
-        
-        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
-            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n 
-        
-        pp = sp.poly1d(pp)
-        roots = sp.roots(pp)        
-        
-        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
-        aa = sp.poly1d(neg_real_part_roots, r=True).coeffs        
-        
-        F = np.diag(np.ones((N-1,)),1)
-        F[-1,:] = -aa[-1:0:-1]
-        
-        L= np.zeros((N,1))
-        L[N-1,0] = 1
-        
-        H = np.zeros((1,N))
-        H[0,0] = 1
-        
-        # Infinite covariance:
-        Pinf = lyap(F, -np.dot(L,np.dot( Qc[0,0],L.T)))
-        Pinf = 0.5*(Pinf + Pinf.T)
-        # Allocating space for derivatives        
-        dF    = np.empty([F.shape[0],F.shape[1],2])
-        dQc   = np.empty([Qc.shape[0],Qc.shape[1],2]) 
-        dPinf = np.empty([Pinf.shape[0],Pinf.shape[1],2]) 
-        
-        # Derivatives:
-        dFvariance = np.zeros(F.shape)
-        dFlengthscale = np.zeros(F.shape)
-        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
-
-        dQcvariance = Qc/self.variance
-        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))         
-        
-        dPinf_variance = Pinf/self.variance
-        
-        lp = Pinf.shape[0]
-        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
-        coeff[np.mod(coeff,2) != 0] = 0
-        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
-        
-        dF[:,:,0]    = dFvariance 
-        dF[:,:,1]    = dFlengthscale 
-        dQc[:,:,0]   = dQcvariance 
-        dQc[:,:,1]   = dQclengthscale 
-        dPinf[:,:,0] = dPinf_variance 
-        dPinf[:,:,1] = dPinf_lengthscale
-        
-        P0 = Pinf.copy()
-        dP0 = dPinf.copy()
-        
-        # Benefits of this are not very sound. Helps only in one case:
-        # SVD Kalman + RBF kernel
-        import GPy.models.state_space_main as ssm
-        (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
-        
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-
-class sde_Exponential(Exponential):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Exponential kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-    
-    def sde_update_gradient_full(self, gradients):
-        """
-        Update gradient in the order in which parameters are represented in the
-        kernel
-        """
-    
-        self.variance.gradient = gradients[0]
-        self.lengthscale.gradient = gradients[1]
-        
-    def sde(self): 
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        variance = float(self.variance.values)
-        lengthscale = float(self.lengthscale)        
-        
-        F  = np.array(((-1.0/lengthscale,),))
-        L  = np.array(((1.0,),)) 
-        Qc = np.array( ((2.0*variance/lengthscale,),) ) 
-        H = np.array(((1.0,),)) 
-        Pinf = np.array(((variance,),)) 
-        P0 = Pinf.copy()        
-        
-        dF = np.zeros((1,1,2));  
-        dQc = np.zeros((1,1,2)); 
-        dPinf = np.zeros((1,1,2));
-        
-        dF[:,:,0] = 0.0        
-        dF[:,:,1] = 1.0/lengthscale**2
-        
-        dQc[:,:,0] = 2.0/lengthscale       
-        dQc[:,:,1] = -2.0*variance/lengthscale**2
-        
-        dPinf[:,:,0] = 1.0
-        dPinf[:,:,1] = 0.0
-        
-        dP0 = dPinf.copy()        
-
-        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)
-        
-class sde_RatQuad(RatQuad):
-    """
-    
-    Class provide extra functionality to transfer this covariance function into
-    SDE form.
-    
-    Rational Quadratic kernel:
-
-    .. math::
-
-       k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \alpha} \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }
-
-    """
-
-    def sde(self):
-        """ 
-        Return the state space representation of the covariance. 
-        """ 
-        
-        assert False, 'Not Implemented'
-        
-        # Params to use:
-
-        # self.lengthscale
-        # self.variance
-        #self.power
-        
-        #return (F, L, Qc, H, Pinf, dF, dQc, dPinf)  
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -44,7 +44,15 @@ class Add(CombinationKernel):
            return False

    def to_dict(self):
-        input_dict = super(Add, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Add, self)._save_to_input_dict()
        input_dict["class"] = str("GPy.kern.Add")
        return input_dict

--- a/GPy/kern/src/coregionalize.py
+++ b/GPy/kern/src/coregionalize.py
@ -6,11 +6,14 @@ import numpy as np
 from ...core.parameterization import Param
 from paramz.transformations import Logexp
 from ...util.config import config # for assesing whether to use cython
+
 try:
    from . import coregionalize_cython
-    config.set('cython', 'working', 'True')
+    use_coregionalize_cython = config.getboolean('cython', 'working')
 except ImportError:
-    config.set('cython', 'working', 'False')
+    print('warning in coregionalize: failed to import cython module: falling back to numpy')
+    use_coregionalize_cython = False
+

 class Coregionalize(Kern):
    """
@ -61,7 +64,7 @@ class Coregionalize(Kern):
        self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)

    def K(self, X, X2=None):
-        if config.getboolean('cython', 'working'):
+        if use_coregionalize_cython:
            return self._K_cython(X, X2)
        else:
            return self._K_numpy(X, X2)
@ -92,7 +95,7 @@ class Coregionalize(Kern):
            index2 = np.asarray(X2, dtype=np.int)

        #attempt to use cython for a nasty double indexing loop: fall back to numpy
-        if config.getboolean('cython', 'working'):
+        if use_coregionalize_cython:
            dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
        else:
            dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@ -60,7 +60,7 @@ class Kern(Parameterized):
        from .psi_comp import PSICOMP_GH
        self.psicomp = PSICOMP_GH()

-    def _to_dict(self):
+    def _save_to_input_dict(self):
        input_dict = {}
        input_dict["input_dim"] = self.input_dim
        if isinstance(self.active_dims, np.ndarray):
@ -76,16 +76,28 @@ class Kern(Parameterized):

    @staticmethod
    def from_dict(input_dict):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
+
        import copy
        input_dict = copy.deepcopy(input_dict)
        kernel_class = input_dict.pop('class')
        input_dict["name"] = str(input_dict["name"])
        import GPy
        kernel_class = eval(kernel_class)
-        return kernel_class._from_dict(kernel_class, input_dict)
+        return kernel_class._build_from_input_dict(kernel_class, input_dict)

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        return kernel_class(**input_dict)


@ -375,15 +387,15 @@ class CombinationKernel(Kern):
        if link_parameters:
            self.link_parameters(*kernels)

-    def _to_dict(self):
-        input_dict = super(CombinationKernel, self)._to_dict()
+    def _save_to_input_dict(self):
+        input_dict = super(CombinationKernel, self)._save_to_input_dict()
        input_dict["parts"]  = {}
        for ii in range(len(self.parts)):
            input_dict["parts"][ii] = self.parts[ii].to_dict()
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        parts = input_dict.pop('parts', None)
        subkerns = []
        for pp in parts:
--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@ -52,14 +52,14 @@ class Linear(Kern):
        self.psicomp = PSICOMP_Linear()

    def to_dict(self):
-        input_dict = super(Linear, self)._to_dict()
+        input_dict = super(Linear, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.Linear"
        input_dict["variances"] = self.variances.values.tolist()
        input_dict["ARD"] = self.ARD
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return Linear(**input_dict)

--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -43,7 +43,15 @@ class Prod(CombinationKernel):
        super(Prod, self).__init__(_newkerns, name)

    def to_dict(self):
-        input_dict = super(Prod, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Prod, self)._save_to_input_dict()
        input_dict["class"] = str("GPy.kern.Prod")
        return input_dict

--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@ -32,7 +32,15 @@ class RBF(Stationary):
            self.link_parameter(self.inv_l)

    def to_dict(self):
-        input_dict = super(RBF, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(RBF, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.RBF"
        input_dict["inv_l"] = self.use_invLengthscale
        if input_dict["inv_l"] == True:
--- a/GPy/kern/src/sde_standard_periodic.py
+++ b/GPy/kern/src/sde_standard_periodic.py
@ -9,6 +9,7 @@ from .standard_periodic import StdPeriodic

 import numpy as np
 import scipy as sp
+import warnings

 from scipy import special as special

@ -26,6 +27,38 @@ class sde_StdPeriodic(StdPeriodic):
       \left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }

    """
+    # TODO: write comment to the constructor arguments
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        StdPeriodic kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 7)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf False). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        #import pdb; pdb.set_trace()
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 7
+        
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = False
+        
+        super(sde_StdPeriodic, self).__init__(*args, **kwargs)
+        
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
@ -38,41 +71,48 @@ class sde_StdPeriodic(StdPeriodic):
        
    def sde(self): 
        """ 
-        Return the state space representation of the covariance.
+        Return the state space representation of the standard periodic covariance.
        
        
-        ! Note: one must constrain lengthscale not to drop below 0.25.
-        After this bessel functions of the first kind grows to very high.
+        ! Note: one must constrain lengthscale not to drop below 0.2. (independently of approximation order)
+        After this Bessel functions of the first becomes NaN. Rescaling
+        time variable might help.
        
-        ! Note: one must keep wevelength also not very low. Because then
+        ! Note: one must keep period also not very low. Because then
        the gradients wrt wavelength become ustable. 
        However this might depend on the data. For test example with
-        300 data points the low limit is 0.15.
+        300 data points the low limit is 0.15. 
        """ 
        
+        #import pdb; pdb.set_trace()
        # Params to use: (in that order)
        #self.variance
        #self.period
        #self.lengthscale
-        N = 7 # approximation order        
+        if self.approx_order is not None:
+            N = int(self.approx_order)
+        else:
+            N = 7 # approximation order        
        
+        p_period = float(self.period)        
+        p_lengthscale = 2*float(self.lengthscale)
+        p_variance = float(self.variance)        
        
-        w0 = 2*np.pi/self.period # frequency
-        lengthscale = 2*self.lengthscale         
+        w0 = 2*np.pi/p_period # frequency
+        # lengthscale is multiplied by 2 because of different definition of lengthscale
        
-        [q2,dq2l] = seriescoeff(N,lengthscale,self.variance)        
-        # lengthscale is multiplied by 2 because of slightly different
-        # formula for periodic covariance function.
-        # For the same reason:
+        [q2,dq2l] = seriescoeff(N, p_lengthscale, p_variance)        
        
-        dq2l = 2*dq2l
-        
-        if np.any( np.isfinite(q2) == False):
-            raise ValueError("SDE periodic covariance error 1")
-        
-        if np.any( np.isfinite(dq2l) == False):
-            raise ValueError("SDE periodic covariance error 2")
+        dq2l = 2*dq2l  # This is because the lengthscale if multiplied by 2.
        
+        eps = 1e-12
+        if np.any( np.isfinite(q2) == False) or np.any( np.abs(q2) > 1.0/eps) or np.any( np.abs(q2) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in q2 :".format(eps) + q2.__format__("") )
+                                
+        if np.any( np.isfinite(dq2l) == False) or np.any( np.abs(dq2l) > 1.0/eps) or np.any( np.abs(dq2l) < eps):
+            warnings.warn("sde_Periodic:  Infinite, too small, or too large (eps={0:e}) values in dq2l :".format(eps) + q2.__format__("") )
+                 
+                 
        F    = np.kron(np.diag(range(0,N+1)),np.array( ((0, -w0), (w0, 0)) ) )
        L    = np.eye(2*(N+1))
        Qc   = np.zeros((2*(N+1), 2*(N+1)))
@ -88,10 +128,10 @@ class sde_StdPeriodic(StdPeriodic):
        # Derivatives wrt self.variance
        dF[:,:,0] = np.zeros(F.shape)
        dQc[:,:,0] = np.zeros(Qc.shape)
-        dP_inf[:,:,0] = P_inf / self.variance
+        dP_inf[:,:,0] = P_inf / p_variance

        # Derivatives self.period
-        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / self.period );
+        dF[:,:,1] = np.kron(np.diag(range(0,N+1)),np.array( ((0,  w0), (-w0, 0)) ) / p_period );
        dQc[:,:,1] = np.zeros(Qc.shape)
        dP_inf[:,:,1] = np.zeros(P_inf.shape)      
        
@ -100,7 +140,12 @@ class sde_StdPeriodic(StdPeriodic):
        dQc[:,:,2] = np.zeros(Qc.shape)
        dP_inf[:,:,2] = np.kron(np.diag(dq2l),np.eye(2))
        dP0 = dP_inf.copy()
-
+        
+        if self.balance:
+            # Benefits of this are not very sound.
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf,dP0) = ssm.balance_ss_model(F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0 )
+            
        return (F, L, Qc, H, P_inf, P0, dF, dQc, dP_inf, dP0)
        
        
@ -164,9 +209,9 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
        coeffs = 2*magnSigma2*sp.exp( -lengthScale**(-2) ) * special.iv(range(0,m+1),1.0/lengthScale**(2))
        if np.any( np.isfinite(coeffs) == False):
            raise ValueError("sde_standard_periodic: Coefficients are not finite!")
-            #import pdb; pdb.set_trace()
+        #import pdb; pdb.set_trace()
        coeffs[0] = 0.5*coeffs[0]
-        
+        #print(coeffs)
        # Derivatives wrt (lengthScale)
        coeffs_dl = np.zeros(m+1)
        coeffs_dl[1:] = magnSigma2*lengthScale**(-3) * sp.exp(-lengthScale**(-2))*\
@ -177,4 +222,4 @@ def seriescoeff(m=6,lengthScale=1.0,magnSigma2=1.0, true_covariance=False):
            (2*special.iv(0,lengthScale**(-2)) - 2*special.iv(1,lengthScale**(-2)) )     
        

-    return coeffs, coeffs_dl
+    return coeffs.squeeze(), coeffs_dl.squeeze()
--- a/GPy/kern/src/sde_stationary.py
+++ b/GPy/kern/src/sde_stationary.py
@ -15,6 +15,7 @@ try:
    from scipy.linalg import solve_continuous_lyapunov as lyap
 except ImportError:
    from scipy.linalg import solve_lyapunov as lyap
+import warnings

 class sde_RBF(RBF):
    """
@ -29,6 +30,37 @@ class sde_RBF(RBF):
        k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \\ \\ \\ \\  \text{ where  } r = \sqrt{\sum_{i=1}^{input dim} \frac{(x_i-y_i)^2}{\ell_i^2} }

    """
+    def __init__(self, *args, **kwargs):
+        """
+        Init constructior.
+        
+        Two optinal extra parameters are added in addition to the ones in 
+        RBF kernel.
+        
+        :param approx_order: approximation order for the RBF covariance. (Default 10)
+        :type approx_order: int
+        
+        :param balance: Whether to balance this kernel separately. (Defaulf True). Model has a separate parameter for balancing.
+        :type balance: bool
+        """
+        
+        if 'balance' in kwargs:
+            self.balance = bool( kwargs.get('balance') )
+            del kwargs['balance']
+        else:
+            self.balance = True
+        
+        
+        if 'approx_order' in kwargs:
+            self.approx_order = kwargs.get('approx_order')
+            del kwargs['approx_order']
+        else:
+            self.approx_order = 6
+        
+        
+        
+        super(sde_RBF, self).__init__(*args, **kwargs)
+        
    def sde_update_gradient_full(self, gradients):
        """
        Update gradient in the order in which parameters are represented in the
@ -41,23 +73,43 @@ class sde_RBF(RBF):
    def sde(self):
        """
        Return the state space representation of the covariance.
+        
+        Note! For Sparse GP inference too small or two high values of lengthscale
+        lead to instabilities. This is because Qc are too high or too low
+        and P_inf are not full rank. This effect depends on approximatio order.
+        For N = 10. lengthscale must be in (0.8,8). For other N tests must be conducted.
+        N=6: (0.06,31)
+        Variance should be within reasonable bounds as well, but its dependence is linear.
+        
+        The above facts do not take into accout regularization.
        """
-
-        N = 10# approximation order ( number of terms in exponent series expansion)
+        #import pdb; pdb.set_trace()
+        if self.approx_order is not None:
+            N = self.approx_order
+        else:
+            N = 10# approximation order ( number of terms in exponent series expansion)
+            
        roots_rounding_decimals = 6

        fn = np.math.factorial(N)

-        kappa = 1.0/2.0/self.lengthscale**2
+        p_lengthscale = float( self.lengthscale )
+        p_variance = float(self.variance)
+        kappa = 1.0/2.0/p_lengthscale**2

-        Qc = np.array((self.variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),)
+        Qc = np.array( ((p_variance*np.sqrt(np.pi/kappa)*fn*(4*kappa)**N,),) )
+        
+        eps = 1e-12
+        if (float(Qc) > 1.0/eps) or (float(Qc) < eps):
+            warnings.warn("""sde_RBF kernel: the noise variance Qc is either very large or very small. 
+                                It influece conditioning of P_inf: {0:e}""".format(float(Qc)) )

-        pp = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower
+        pp1 = np.zeros((2*N+1,)) # array of polynomial coefficients from higher power to lower

        for n in range(0, N+1): # (2N+1) - number of polynomial coefficients
-            pp[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
-
-        pp = sp.poly1d(pp)
+            pp1[2*(N-n)] = fn*(4.0*kappa)**(N-n)/np.math.factorial(n)*(-1)**n
+            
+        pp = sp.poly1d(pp1)
        roots = sp.roots(pp)

        neg_real_part_roots = roots[np.round(np.real(roots) ,roots_rounding_decimals) < 0]
@ -83,17 +135,17 @@ class sde_RBF(RBF):
        # Derivatives:
        dFvariance = np.zeros(F.shape)
        dFlengthscale = np.zeros(F.shape)
-        dFlengthscale[-1,:] = -aa[-1:0:-1]/self.lengthscale * np.arange(-N,0,1)
+        dFlengthscale[-1,:] = -aa[-1:0:-1]/p_lengthscale * np.arange(-N,0,1)

-        dQcvariance = Qc/self.variance
-        dQclengthscale = np.array(((self.variance*np.sqrt(2*np.pi)*fn*2**N*self.lengthscale**(-2*N)*(1-2*N,),)))
-
-        dPinf_variance = Pinf/self.variance
+        dQcvariance = Qc/p_variance
+        dQclengthscale = np.array(( (p_variance*np.sqrt(2*np.pi)*fn*2**N*p_lengthscale**(-2*N)*(1-2*N),),))
+        
+        dPinf_variance = Pinf/p_variance

        lp = Pinf.shape[0]
        coeff = np.arange(1,lp+1).reshape(lp,1) + np.arange(1,lp+1).reshape(1,lp) - 2
        coeff[np.mod(coeff,2) != 0] = 0
-        dPinf_lengthscale = -1/self.lengthscale*Pinf*coeff
+        dPinf_lengthscale = -1/p_lengthscale*Pinf*coeff

        dF[:,:,0]    = dFvariance
        dF[:,:,1]    = dFlengthscale
@ -105,10 +157,11 @@ class sde_RBF(RBF):
        P0 = Pinf.copy()
        dP0 = dPinf.copy()

-        # Benefits of this are not very sound. Helps only in one case:
-        # SVD Kalman + RBF kernel
-        import GPy.models.state_space_main as ssm
-        (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0, T) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )
+        if self.balance:
+            # Benefits of this are not very sound. Helps only in one case:
+            # SVD Kalman + RBF kernel
+            import GPy.models.state_space_main as ssm
+            (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf,dP0) = ssm.balance_ss_model(F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0 )

        return (F, L, Qc, H, Pinf, P0, dF, dQc, dPinf, dP0)

--- a/GPy/kern/src/standard_periodic.py
+++ b/GPy/kern/src/standard_periodic.py
@ -94,7 +94,15 @@ class StdPeriodic(Kern):
        self.link_parameters(self.variance,  self.period, self.lengthscale)

    def to_dict(self):
-        input_dict = super(StdPeriodic, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(StdPeriodic, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.StdPeriodic"
        input_dict["variance"] = self.variance.values.tolist()
        input_dict["period"] = self.period.values.tolist()
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@ -14,8 +14,8 @@ class Static(Kern):
        self.variance = Param('variance', variance, Logexp())
        self.link_parameters(self.variance)

-    def _to_dict(self):
-        input_dict = super(Static, self)._to_dict()
+    def _save_to_input_dict(self):
+        input_dict = super(Static, self)._save_to_input_dict()
        input_dict["variance"] =  self.variance.values.tolist()
        return input_dict

@ -139,12 +139,12 @@ class Bias(Static):
        super(Bias, self).__init__(input_dim, variance, active_dims, name)

    def to_dict(self):
-        input_dict = super(Bias, self)._to_dict()
+        input_dict = super(Bias, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.Bias"
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return Bias(**input_dict)

--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -14,9 +14,10 @@ from paramz.transformations import Logexp

 try:
    from . import stationary_cython
+    use_stationary_cython = config.getboolean('cython', 'working')
 except ImportError:
    print('warning in stationary: failed to import cython module: falling back to numpy')
-    config.set('cython', 'working', 'false')
+    use_stationary_cython = False


 class Stationary(Kern):
@ -79,8 +80,8 @@ class Stationary(Kern):
        assert self.variance.size==1
        self.link_parameters(self.variance, self.lengthscale)

-    def _to_dict(self):
-        input_dict = super(Stationary, self)._to_dict()
+    def _save_to_input_dict(self):
+        input_dict = super(Stationary, self)._save_to_input_dict()
        input_dict["variance"] =  self.variance.values.tolist()
        input_dict["lengthscale"] = self.lengthscale.values.tolist()
        input_dict["ARD"] = self.ARD
@ -203,7 +204,7 @@ class Stationary(Kern):

            tmp = dL_dr*self._inv_dist(X, X2)
            if X2 is None: X2 = X
-            if config.getboolean('cython', 'working'):
+            if use_stationary_cython:
                self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
            else:
                self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
@ -246,7 +247,7 @@ class Stationary(Kern):
        """
        Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
        """
-        if config.getboolean('cython', 'working'):
+        if use_stationary_cython:
            return self._gradients_X_cython(dL_dK, X, X2)
        else:
            return self._gradients_X_pure(dL_dK, X, X2)
@ -366,12 +367,20 @@ class Exponential(Stationary):
        return -self.K_of_r(r)

    def to_dict(self):
-        input_dict = super(Exponential, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Exponential, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.Exponential"
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return Exponential(**input_dict)

@ -424,12 +433,20 @@ class Matern32(Stationary):
        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def to_dict(self):
-        input_dict = super(Matern32, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Matern32, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.Matern32"
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return Matern32(**input_dict)

@ -513,12 +530,20 @@ class Matern52(Stationary):
        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def to_dict(self):
-        input_dict = super(Matern52, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Matern52, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.Matern52"
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return Matern52(**input_dict)

@ -578,12 +603,20 @@ class ExpQuad(Stationary):
        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

    def to_dict(self):
-        input_dict = super(ExpQuad, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(ExpQuad, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.ExpQuad"
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return ExpQuad(**input_dict)

@ -621,13 +654,21 @@ class RatQuad(Stationary):
        self.link_parameters(self.power)

    def to_dict(self):
-        input_dict = super(RatQuad, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(RatQuad, self)._save_to_input_dict()
        input_dict["class"] = "GPy.kern.RatQuad"
        input_dict["power"] = self.power.values.tolist()
        return input_dict

    @staticmethod
-    def _from_dict(kernel_class, input_dict):
+    def _build_from_input_dict(kernel_class, input_dict):
        useGPU = input_dict.pop('useGPU', None)
        return RatQuad(**input_dict)

--- a/GPy/kern/src/symmetric.py
+++ b/GPy/kern/src/symmetric.py
@ -0,0 +1,170 @@
+import numpy as np
+
+from .kern import Kern
+
+
+class Symmetric(Kern):
+    """
+    Symmetric kernel that models a function with even or odd symmetry:
+
+    For even symmetry we have:
+
+    .. math::
+
+        f(x) = f(Ax)
+
+    we then model the function as:
+
+    .. math::
+
+        f(x) = g(x) + g(Ax)
+
+    the corresponding kernel is:
+
+    .. math::
+
+        k(x, x') + k(Ax, x') + k(x, Ax') + k(Ax, Ax')
+
+    For odd symmetry we have:
+
+    .. math::
+
+        f(x) = -f(Ax)
+
+    it does this by modelling:
+
+    .. math::
+
+        f(x) = g(x) - g(Ax)
+
+    with kernel
+
+    .. math::
+
+        k(x, x') - k(Ax, x') - k(x, Ax') + k(Ax, Ax')
+
+    where k(x, x') is the kernel of g(x)
+
+    :param base_kernel: kernel to make symmetric
+    :param transform: transformation matrix describing symmetry plane, A in equations above
+    :param symmetry_type: 'odd' or 'even' depending on the symmetry needed
+    """
+
+    def __init__(self, base_kernel, transform, symmetry_type='even'):
+        n_dims = max(base_kernel.active_dims) + 1
+        super(Symmetric, self).__init__(n_dims, list(range(n_dims)), name='symmetric_kernel')
+        if symmetry_type is 'odd':
+            self.symmetry_sign = -1.
+        elif symmetry_type is 'even':
+            self.symmetry_sign = 1.
+        else:
+            raise ValueError('symmetry_type input must be ''odd'' or ''even''')
+        self.transform = transform
+        self.base_kernel = base_kernel
+        self.param_names = base_kernel.parameter_names()
+        self.link_parameters(self.base_kernel)
+
+    def K(self, X, X2):
+        X_sym = X.dot(self.transform)
+
+        if X2 is None:
+            X2 = X
+            X2_sym = X_sym
+        else:
+            X2_sym = X2.dot(self.transform)
+
+        cross_term_x_ax = self.symmetry_sign * self.base_kernel.K(X, X2_sym)
+
+        if X2 is None:
+            cross_term_ax_x = cross_term_x_ax.T
+        else:
+            cross_term_ax_x = self.symmetry_sign * \
+                self.base_kernel.K(X_sym, X2)
+
+        return (self.base_kernel.K(X, X2) + cross_term_x_ax + cross_term_ax_x
+                + self.base_kernel.K(X_sym, X2_sym))
+
+    def Kdiag(self, X):
+        n_points = X.shape[0]
+        X_sym = X.dot(self.transform)
+
+        # Evaluate cross terms in batches, taking the diag of a larger matrix
+        # is wasteful, but is more efficient than calling kernel.K for each data point
+        batch_size = 100
+        n_batches = int(np.ceil(n_points / float(batch_size)))
+        cross_term = np.zeros(X.shape[0])
+        for i in range(n_batches):
+            i_start = i * batch_size
+            i_end = np.min([(i + 1) * batch_size, n_points])
+            cross_term[i_start:i_end] = np.diag(self.base_kernel.K(
+                X_sym[i_start:i_end, :], X[i_start:i_end, :]))
+
+        return self.base_kernel.Kdiag(X) + 2 * self.symmetry_sign * cross_term + self.base_kernel.Kdiag(X_sym)
+
+    def update_gradients_full(self, dL_dK, X, X2):
+        X_sym = X.dot(self.transform)
+        if X2 is None:
+            X2 = X
+        X2_sym = X2.dot(self.transform)
+
+        # Get gradients from base kernel one term at a time
+        self.base_kernel.update_gradients_full(dL_dK, X_sym, X2)
+        gradient = self.symmetry_sign * self.base_kernel.gradient.copy()
+
+        self.base_kernel.update_gradients_full(dL_dK, X, X2_sym)
+        gradient += self.symmetry_sign * self.base_kernel.gradient.copy()
+
+        self.base_kernel.update_gradients_full(dL_dK, X_sym, X2_sym)
+        gradient += self.base_kernel.gradient.copy()
+
+        self.base_kernel.update_gradients_full(dL_dK, X, X2)
+        gradient += self.base_kernel.gradient.copy()
+
+        # Set gradients
+        self.base_kernel.gradient = gradient
+
+    def update_gradients_diag(self, dL_dK, X):
+
+        dL_dK_full = np.diag(dL_dK)
+        X_sym = X.dot(self.transform)
+
+        # Calculate gradient for k(Ax, Ax')
+        self.base_kernel.update_gradients_diag(dL_dK, X_sym)
+        gradient = self.base_kernel.gradient.copy()
+
+        # Calculate gradient for k(x, x')
+        self.base_kernel.update_gradients_diag(dL_dK, X)
+        gradient += self.base_kernel.gradient.copy()
+
+        # Batch process cross term for speed
+        batch_size = 100
+        n_points = dL_dK.shape[0]
+        n_batches = int(np.ceil(n_points / float(batch_size)))
+        gradient_part = np.zeros(gradient.shape)
+        for i in range(n_batches):
+            i_start = i * batch_size
+            i_end = np.min([(i + 1) * batch_size, n_points])
+            dL_dK_part = dL_dK_full[i_start:i_end, i_start:i_end]
+            X_part = X[i_start:i_end, :]
+            X_sym_part = X_sym[i_start:i_end, :]
+            self.base_kernel.update_gradients_full(
+                dL_dK_part, X_part, X_sym_part)
+            gradient_part += self.base_kernel.gradient.copy()
+
+        gradient += 2 * self.symmetry_sign * gradient_part
+
+        self.base_kernel.gradient = gradient
+
+    def gradients_X(self, dL_dK, X, X2):
+        X_sym = X.dot(self.transform)
+        if X2 is None:
+            X2 = X
+            X2_sym = X.dot(self.transform)
+            dL_dK = dL_dK + dL_dK.T
+        else:
+            X2_sym = X2.dot(self.transform)
+
+        return (self.base_kernel.gradients_X(dL_dK, X, X2)
+                + self.base_kernel.gradients_X(dL_dK, X_sym, X2_sym).dot(self.transform.T)
+                + self.symmetry_sign * self.base_kernel.gradients_X(dL_dK, X, X2_sym)
+                + self.symmetry_sign * self.base_kernel.gradients_X(dL_dK, X_sym, X2).dot(self.transform.T))
--- a/GPy/likelihoods/init.py
+++ b/GPy/likelihoods/init.py
@ -9,3 +9,4 @@ from .mixed_noise import MixedNoise
 from .binomial import Binomial
 from .weibull import Weibull
 from .loglogistic import LogLogistic
+from .multioutput_likelihood import MultioutputLikelihood
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -30,7 +30,15 @@ class Bernoulli(Likelihood):
            self.log_concave = True

    def to_dict(self):
-        input_dict = super(Bernoulli, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Bernoulli, self)._save_to_input_dict()
        input_dict["class"] = "GPy.likelihoods.Bernoulli"
        return input_dict

--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -47,7 +47,15 @@ class Gaussian(Likelihood):
            self.log_concave = True

    def to_dict(self):
-        input_dict = super(Gaussian, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Gaussian, self)._save_to_input_dict()
        input_dict["class"] = "GPy.likelihoods.Gaussian"
        input_dict["variance"] = self.variance.values.tolist()
        return input_dict
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -49,7 +49,7 @@ class Likelihood(Parameterized):
    def to_dict(self):
        raise NotImplementedError

-    def _to_dict(self):
+    def _save_to_input_dict(self):
        input_dict = {}
        input_dict["name"] = self.name
        input_dict["gp_link_dict"] = self.gp_link.to_dict()
@ -57,6 +57,18 @@ class Likelihood(Parameterized):

    @staticmethod
    def from_dict(input_dict):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
+
        import copy
        input_dict = copy.deepcopy(input_dict)
        likelihood_class = input_dict.pop('class')
@ -64,10 +76,10 @@ class Likelihood(Parameterized):
        name = input_dict.pop('name')
        import GPy
        likelihood_class = eval(likelihood_class)
-        return likelihood_class._from_dict(likelihood_class, input_dict)
+        return likelihood_class._build_from_input_dict(likelihood_class, input_dict)

    @staticmethod
-    def _from_dict(likelihood_class, input_dict):
+    def _build_from_input_dict(likelihood_class, input_dict):
        import copy
        input_dict = copy.deepcopy(input_dict)
        gp_link_dict = input_dict.pop('gp_link_dict')
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -46,20 +46,32 @@ class GPTransformation(object):
    def to_dict(self):
        raise NotImplementedError

-    def _to_dict(self):
+    def _save_to_input_dict(self):
        return {}

    @staticmethod
    def from_dict(input_dict):
+        """
+        Instantiate an object of a derived class using the information
+        in input_dict (built by the to_dict method of the derived class).
+        More specifically, after reading the derived class from input_dict,
+        it calls the method _build_from_input_dict of the derived class.
+        Note: This method should not be overrided in the derived class. In case
+        it is needed, please override _build_from_input_dict instate.
+
+        :param dict input_dict: Dictionary with all the information needed to
+           instantiate the object.
+        """
+
        import copy
        input_dict = copy.deepcopy(input_dict)
        link_class = input_dict.pop('class')
        import GPy
        link_class = eval(link_class)
-        return link_class._from_dict(link_class, input_dict)
+        return link_class._build_from_input_dict(link_class, input_dict)

    @staticmethod
-    def _from_dict(link_class, input_dict):
+    def _build_from_input_dict(link_class, input_dict):
        return link_class(**input_dict)

 class Identity(GPTransformation):
@ -82,7 +94,15 @@ class Identity(GPTransformation):
        return np.zeros_like(f)

    def to_dict(self):
-        input_dict = super(Identity, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Identity, self)._save_to_input_dict()
        input_dict["class"] = "GPy.likelihoods.link_functions.Identity"
        return input_dict

@ -106,7 +126,15 @@ class Probit(GPTransformation):
        return (safe_square(f)-1.)*std_norm_pdf(f)

    def to_dict(self):
-        input_dict = super(Probit, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Probit, self)._save_to_input_dict()
        input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
        return input_dict

--- a/GPy/likelihoods/multioutput_likelihood.py
+++ b/GPy/likelihoods/multioutput_likelihood.py
@ -0,0 +1,233 @@
+# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+# Multioutput likelihood structure is similar to the 
+# corresponding structure in GPstuff. If building complex
+# multioutput models on top of this class and need a reference,
+# check GPstuff project.
+
+import numpy as np
+from scipy import stats, special
+from . import link_functions
+from .likelihood import Likelihood
+from .mixed_noise import MixedNoise
+from .gaussian import Gaussian
+from ..core.parameterization import Param
+from paramz.transformations import Logexp
+from ..core.parameterization import Parameterized
+from ..kern.src.independent_outputs import index_to_slices
+import itertools
+
+class MultioutputLikelihood(MixedNoise):
+    '''
+    CombinedLikelihood is used to combine different likelihoods for 
+    multioutput models, where different outputs have different observation models.
+    
+    As input the likelihood takes a list of likelihoods used. The likelihood
+    uses "output_index" in Y_metadata to connect observations to likelihoods.
+    '''
+    def __init__(self, likelihoods_list, name='multioutput_likelihood'):
+        super(Likelihood, self).__init__(name=name)
+        
+        indices, inverse = self._unique_likelihoods(likelihoods_list)
+        self.link_parameters(*[likelihoods_list[i] for i in indices])
+        
+        self.index_map = [indices[i] for i in inverse]
+        self.inverse = inverse 
+        
+        self.gradient_sizes = [likelihoods_list[i].size for i in indices]
+        self.gradient_index = np.cumsum(self.gradient_sizes) - self.gradient_sizes[0]
+        
+        self.likelihoods_list = likelihoods_list
+        
+        self.gp_link = None
+        self.log_concave = False
+        self.not_block_really = False
+    
+    def _unique_likelihoods(self, likelihoods_list):
+        indices = []
+        inverse = []
+        for i in range(len(likelihoods_list)):
+            for j in indices:
+                if likelihoods_list[i] is likelihoods_list[j]:
+                    inverse += [j]
+                    break
+            if len(inverse) <= i:
+                indices += [i]
+                inverse += [i]
+        return indices, inverse
+    
+    def moments_match_ep(self, data_i, tau_i, v_i, Y_metadata_i):
+        return self.likelihoods_list[Y_metadata_i["output_index"][0]].moments_match_ep(data_i, tau_i, v_i, Y_metadata_i)
+
+    def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
+        assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
+        ind = [self.index_map[i] for i in Y_metadata['output_index'].flatten()]
+        return np.array([dL_dKdiag[ind==i].sum() for i in np.unique(self.index_map)])
+    
+    def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
+        ind = [self.index_map[i] for i in Y_metadata['output_index'].flatten()]
+        slices = index_to_slices(ind)
+        grads = np.zeros((self.size))
+        for i in range(len(slices)):
+            if self.likelihoods_list[i].size > 0:
+                ii = self.inverse[i] ## index in our gradient_sizes and gradient_index -lists
+                for j in range(len(slices[i])): 
+                    grads[self.gradient_index[ii]:self.gradient_index[ii]+self.gradient_sizes[ii]] += self.likelihoods_list[i].ep_gradients(Y[slices[i][j],:], cav_tau[slices[i][j]], cav_v[slices[i][j]], dL_dKdiag = dL_dKdiag[slices[i][j]], Y_metadata=Y_metadata, quad_mode=quad_mode, boost_grad=boost_grad)
+        return grads
+
+    def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        mu_new = np.zeros(mu.shape )
+        var_new = np.zeros(var.shape )
+        for j in outputs:
+            m, v = self.likelihoods_list[j].predictive_values(mu[ind==j,:], var[ind==j,:], full_cov, Y_metadata=None)
+            mu_new[ind==j,:] = m
+            var_new[ind==j,:] = v
+        return mu_new, var_new
+
+    def predictive_variance(self, mu, sigma, Y_metadata):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        var = np.zeros( (sigma.size) )
+        for j in outputs:
+            v = self.likelihoods_list[j].predictive_variance(mu[ind==j,:],
+                sigma[ind==j,:],Y_metadata=None)
+            var[ind==j,:] = np.hstack(v)
+        return [v[:,None] for v in var.T]
+
+    def pdf(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        pdf = np.zeros(y.shape)
+        for j in outputs:
+            pdf[ind==j,:] = self.likelihoods_list[j].pdf(f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return pdf
+
+    def pdf_link(self, inv_link_f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        pdf_link = np.zeros(y.shape)
+        for j in outputs:
+            pdf_link[ind==j,:] = self.likelihoods_list[j].pdf_link(inv_link_f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return pdf_link
+
+    def logpdf(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        if ind.shape[0]==1:
+            ind = ind[0]*np.ones(f.shape[0])
+            y = y*np.ones(f.shape)
+        lpdf = np.zeros(f.shape)
+        for j in outputs:
+            lpdf[np.where(ind==j)[0],:] = self.likelihoods_list[j].logpdf(f[np.where(ind==j)[0],:], y[np.where(ind==j)[0],:], Y_metadata=None)
+        return lpdf
+
+    def logpdf_link(self, inv_link_f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        logpdf_link = np.zeros(y.shape)
+        for j in outputs:
+            logpdf_link[ind==j,:] = self.likelihoods_list[j].logpdf_link(inv_link_f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return logpdf_link
+    
+    def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        dlogpdf_dlink = np.zeros(y.shape)
+        for j in outputs:
+            dlogpdf_dlink[ind==j,:] = self.likelihoods_list[j].dlogpdf_dlink(inv_link_f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return dlogpdf_dlink
+    
+    def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        d2logpdf_dlink2 = np.zeros(y.shape)
+        for j in outputs:
+            d2logpdf_dlink2[ind==j,:] = self.likelihoods_list[j].d2logpdf_dlink2(inv_link_f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return d2logpdf_dlink2
+     
+    def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        d3logpdf_dlink3 = np.zeros(y.shape)
+        for j in outputs:
+            d3logpdf_dlink3[ind==j,:] = self.likelihoods_list[j].d3logpdf_dlink3(inv_link_f[ind==j,:], y[ind==j,:], Y_metadata=None)
+        return d3logpdf_dlink3
+        
+    def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        log_pred = np.zeros(y_test.shape)
+        for j in outputs:
+            log_pred[ind==j,:] = self.likelihoods_list[j].log_predictive_density(y_test[ind==j,:], mu_star[ind==j,:], var_star[ind==j,:], Y_metadata=None)
+        return log_pred
+
+    def dlogpdf_dtheta(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        if ind.shape[0]==1:
+            ind = ind[0]*np.ones(f.shape[0])
+            y = y*np.ones(f.shape)
+        slices = index_to_slices(ind)
+        pdf = np.zeros((self.size, f.shape[0], f.shape[1]) )
+        for i in range(len(slices)):
+            if self.likelihoods_list[i].size > 0:
+                ii = self.inverse[i]
+                for j in range(len(slices[i])):
+                    pdf[self.gradient_index[ii]:self.gradient_index[ii]+self.gradient_sizes[ii], slices[i][j],:] = self.likelihoods_list[i].dlogpdf_dtheta(f[slices[i][j],:], y[slices[i][j],:], Y_metadata=None)
+        return pdf
+
+    def d2logpdf_df2(self, f, y, Y_metadata):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        Q = np.zeros(f.shape)
+        for j in outputs:
+            Q[ind==j,:] = self.likelihoods_list[j].d2logpdf_df2(f[ind==j,:],
+                y[ind==j,:],Y_metadata=None)
+        return Q
+
+    def dlogpdf_df(self, f, y, Y_metadata):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        Q = np.zeros(f.shape)
+        for j in outputs:
+            Q[ind==j,:] = self.likelihoods_list[j].dlogpdf_df(f[ind==j,:],
+                y[ind==j,:],Y_metadata=None)
+        return Q
+
+    def d3logpdf_df3(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        outputs = np.unique(ind)
+        Q = np.zeros(f.shape)
+        for j in outputs:
+            Q[ind==j,:] = self.likelihoods_list[j].d3logpdf_df3(f[ind==j,:],
+                y[ind==j,:],Y_metadata=None)
+        return Q
+    
+    def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        if ind.shape[0]==1:
+            ind = ind[0]*np.ones(f.shape[0])
+            y = y*np.ones(f.shape)
+        slices = index_to_slices(ind)
+        pdf = np.zeros((self.size, f.shape[0], f.shape[1]) )
+        for i in range(len(slices)):
+            if self.likelihoods_list[i].size > 0:
+                ii = self.inverse[i]
+                for j in range(len(slices[i])):
+                    pdf[self.gradient_index[ii]:self.gradient_index[ii]+self.gradient_sizes[ii], slices[i][j],:] = self.likelihoods_list[i].dlogpdf_df_dtheta(f[slices[i][j],:], y[slices[i][j],:], Y_metadata=None)
+        return pdf
+    
+    def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
+        ind = Y_metadata['output_index'].flatten()
+        if ind.shape[0]==1:
+            ind = ind[0]*np.ones(f.shape[0])
+            y = y*np.ones(f.shape)
+        slices = index_to_slices(ind)
+        pdf = np.zeros((self.size, f.shape[0], f.shape[1]) )
+        for i in range(len(slices)):
+            if self.likelihoods_list[i].size > 0:
+                ii = self.inverse[i]
+                for j in range(len(slices[i])):
+                    pdf[self.gradient_index[ii]:self.gradient_index[ii]+self.gradient_sizes[ii], slices[i][j],:] = self.likelihoods_list[i].d2logpdf_df2_dtheta(f[slices[i][j],:], y[slices[i][j],:], Y_metadata=None)
+        return pdf
--- a/GPy/mappings/constant.py
+++ b/GPy/mappings/constant.py
@ -40,7 +40,14 @@ class Constant(Mapping):
        return np.zeros_like(X)

    def to_dict(self):
-        input_dict = super(Constant, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+        input_dict = super(Constant, self)._save_to_input_dict()
        input_dict["class"] = "GPy.mappings.Constant"
        input_dict["value"] = self.C.values[0]
        return input_dict
--- a/GPy/mappings/identity.py
+++ b/GPy/mappings/identity.py
@ -20,6 +20,13 @@ class Identity(Mapping):
        return dL_dF

    def to_dict(self):
-        input_dict = super(Identity, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+        input_dict = super(Identity, self)._save_to_input_dict()
        input_dict["class"] = "GPy.mappings.Identity"
        return input_dict
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@ -39,13 +39,21 @@ class Linear(Mapping):
        return np.dot(dL_dF, self.A.T)

    def to_dict(self):
-        input_dict = super(Linear, self)._to_dict()
+        """
+        Convert the object into a json serializable dictionary.
+
+        Note: It uses the private method _save_to_input_dict of the parent.
+
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+
+        input_dict = super(Linear, self)._save_to_input_dict()
        input_dict["class"] = "GPy.mappings.Linear"
        input_dict["A"] = self.A.values.tolist()
        return input_dict

    @staticmethod
-    def _from_dict(mapping_class, input_dict):
+    def _build_from_input_dict(mapping_class, input_dict):
        import copy
        input_dict = copy.deepcopy(input_dict)
        A = np.array(input_dict.pop('A'))
--- a/GPy/models/init.py
+++ b/GPy/models/init.py
@ -29,3 +29,4 @@ from .gp_offset_regression import GPOffsetRegression
 from .gp_grid_regression import GPRegressionGrid
 from .gp_multiout_regression import GPMultioutRegression
 from .gp_multiout_regression_md import GPMultioutRegressionMD
+from .tp_regression import TPRegression
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@ -16,18 +16,27 @@ class GPClassification(GP):
    :param X: input observations
    :param Y: observed values, can be None if likelihood is not None
    :param kernel: a GPy kernel, defaults to rbf
+    :param likelihood: a GPy likelihood, defaults to Bernoulli
+    :param inference_method: Latent function inference to use, defaults to EP
+    :type inference_method: :class:`GPy.inference.latent_function_inference.LatentFunctionInference`

    .. Note:: Multiple independent outputs are allowed using columns of Y

    """

-    def __init__(self, X, Y, kernel=None,Y_metadata=None, mean_function=None):
+    def __init__(self, X, Y, kernel=None,Y_metadata=None, mean_function=None, inference_method=None,
+                 likelihood=None, normalizer=False):
        if kernel is None:
            kernel = kern.RBF(X.shape[1])

-        likelihood = likelihoods.Bernoulli()
+        if likelihood is None:
+            likelihood = likelihoods.Bernoulli()

-        GP.__init__(self, X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification')
+        if inference_method is None:
+            inference_method = EP()
+
+        GP.__init__(self, X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=inference_method,
+                    mean_function=mean_function, name='gp_classification', normalizer=normalizer)

    @staticmethod
    def from_gp(gp):
@ -48,3 +57,9 @@ class GPClassification(GP):

    def save_model(self, output_filename, compress=True, save_data=True):
        self._save_model(output_filename, compress=True, save_data=True)
+
+    @staticmethod
+    def _build_from_input_dict(input_dict, data=None):
+        input_dict = GPClassification._format_input_dict(input_dict, data)
+        input_dict.pop('name', None)  # Name parameter not required by GPClassification
+        return GPClassification(**input_dict)
--- a/GPy/models/sparse_gp_classification.py
+++ b/GPy/models/sparse_gp_classification.py
@ -7,6 +7,7 @@ from ..core import SparseGP
 from .. import likelihoods
 from .. import kern
 from ..inference.latent_function_inference import EPDTC
+from copy import deepcopy

 class SparseGPClassification(SparseGP):
    """
@ -16,8 +17,10 @@ class SparseGPClassification(SparseGP):

    :param X: input observations
    :param Y: observed values
-    :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
+    :param likelihood: a GPy likelihood, defaults to Bernoulli
    :param kernel: a GPy kernel, defaults to rbf+white
+    :param inference_method: Latent function inference to use, defaults to EPDTC
+    :type inference_method: :class:`GPy.inference.latent_function_inference.LatentFunctionInference`
    :param normalize_X:  whether to normalize the input data before computing (predictions will be in original scales)
    :type normalize_X: False|True
    :param normalize_Y:  whether to normalize the input data before computing (predictions will be in original scales)
@ -26,11 +29,13 @@ class SparseGPClassification(SparseGP):

    """

-    def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None):
+    def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None,
+                 mean_function=None, inference_method=None, normalizer=False):
        if kernel is None:
            kernel = kern.RBF(X.shape[1])

-        likelihood = likelihoods.Bernoulli()
+        if likelihood is None:
+            likelihood = likelihoods.Bernoulli()

        if Z is None:
            i = np.random.permutation(X.shape[0])[:num_inducing]
@ -38,7 +43,62 @@ class SparseGPClassification(SparseGP):
        else:
            assert Z.shape[1] == X.shape[1]

-        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata)
+        if inference_method is None:
+            inference_method = EPDTC()
+
+        SparseGP.__init__(self, X, Y, Z, kernel, likelihood, mean_function=mean_function, inference_method=inference_method,
+                          normalizer=normalizer, name='SparseGPClassification', Y_metadata=Y_metadata)
+
+    @staticmethod
+    def from_sparse_gp(sparse_gp):
+        from copy import deepcopy
+        sparse_gp = deepcopy(sparse_gp)
+        SparseGPClassification(sparse_gp.X, sparse_gp.Y, sparse_gp.Z, sparse_gp.kern, sparse_gp.likelihood, sparse_gp.inference_method, sparse_gp.mean_function, name='sparse_gp_classification')
+
+    def to_dict(self, save_data=True):
+        """
+        Store the object into a json serializable dictionary
+
+        :param boolean save_data: if true, it adds the data self.X and self.Y to the dictionary
+        :return dict: json serializable dictionary containing the needed information to instantiate the object
+        """
+        model_dict = super(SparseGPClassification,self).to_dict(save_data)
+        model_dict["class"] = "GPy.models.SparseGPClassification"
+        return model_dict
+
+    @staticmethod
+    def _build_from_input_dict(input_dict, data=None):
+        input_dict = SparseGPClassification._format_input_dict(input_dict, data)
+        input_dict.pop('name', None)  # Name parameter not required by SparseGPClassification
+        return SparseGPClassification(**input_dict)
+
+    @staticmethod
+    def from_dict(input_dict, data=None):
+        """
+        Instantiate an SparseGPClassification object using the information
+        in input_dict (built by the to_dict method).
+
+        :param data: It is used to provide X and Y for the case when the model
+           was saved using save_data=False in to_dict method.
+        :type data: tuple(:class:`np.ndarray`, :class:`np.ndarray`)
+        """
+        import GPy
+        m = GPy.core.model.Model.from_dict(input_dict, data)
+        from copy import deepcopy
+        sparse_gp = deepcopy(m)
+        return SparseGPClassification(sparse_gp.X, sparse_gp.Y, sparse_gp.Z, sparse_gp.kern, sparse_gp.likelihood,  sparse_gp.inference_method, sparse_gp.mean_function, name='sparse_gp_classification')
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        """
+        Method to serialize the model.
+
+        :param string output_filename: Output file
+        :param boolean compress: If true compress the file using zip
+        :param boolean save_data: if true, it serializes the training data
+            (self.X and self.Y)
+        """
+        self._save_model(output_filename, compress=True, save_data=True)
+

 class SparseGPClassificationUncertainInput(SparseGP):
    """
@ -87,8 +147,3 @@ class SparseGPClassificationUncertainInput(SparseGP):
        self.psi2 = self.kern.psi2n(self.Z, self.X)
        self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
        self._update_gradients()
-
-
-
-
-
--- a/GPy/models/state_space_cython.pyx
+++ b/GPy/models/state_space_cython.pyx
@ -432,6 +432,8 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
                            
            self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
            self.last_k = 0
             # !!!Print statistics! Which object is created
            # !!!Print statistics! Print sizes of matrices
@ -477,19 +479,54 @@ cdef class AQcompute_batch_Cython(Q_handling_Cython):
            cdef np.ndarray[DTYPE_t, ndim=2] U
            cdef np.ndarray[DTYPE_t, ndim=1] S           
            cdef np.ndarray[DTYPE_t, ndim=2] Vh
+            
        
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
            else:
-                U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
                                        full_matrices=False, compute_uv=True, 
-                                        overwrite_a=False, check_finite=False)                     
-                                        
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
+                       
                square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_suqare_root_dict[matrix_index] = square_root
            
            return square_root
-
+            
+            
+        cpdef Q_inverse(self, int k, float jitter=0.0):
+            """
+            Square root of the noise matrix Q
+            """
+            
+            cdef int matrix_index = <int>self.reconstruct_indices[k]
+            cdef np.ndarray[DTYPE_t, ndim=2] square_root
+            
+            cdef np.ndarray[DTYPE_t, ndim=2] U
+            cdef np.ndarray[DTYPE_t, ndim=1] S           
+            cdef np.ndarray[DTYPE_t, ndim=2] Vh
+            
+        
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse = self.Q_inverse_dict[matrix_index]
+            else:
+                if matrix_index not in self.Q_svd_dict
+                    U,S,Vh = sp.linalg.svd( self.Qs[:,:, matrix_index], 
+                                        full_matrices=False, compute_uv=True, 
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                else:
+                    U,S,Vh = self.Q_svd_dict[matrix_index]
+                       
+               Q_inverse = Q_inverse = np.dot( Vh.T * ( 1.0/(S + jitter)) , U.T )
+                self.Q_inverse_dict[matrix_index] = Q_inverse
+            
+            return Q_inverse
+            
 #        def return_last(self):
 #            """
 #            Function returns last available matrices.
--- a/GPy/models/state_space_main.py
+++ b/GPy/models/state_space_main.py
@ -12,6 +12,8 @@ import numpy as np
 import scipy as sp
 import scipy.linalg as linalg

+import warnings
+
 try:
    from . import state_space_setup
    setup_available = True
@ -41,6 +43,10 @@ if print_verbose:
    else:
        print("state_space: cython is NOT used")

+# When debugging external module can set some value to this variable (e.g.)
+# 'model' and in this module this variable can be seen.s
+tmp_buffer = None
+

 class Dynamic_Callables_Python(object):

@ -227,7 +233,7 @@ class R_handling_Python(Measurement_Callables_Class):
        self.R_square_root = {}

    def Rk(self, k):
-        return self.R[:, :, self.index[self.R_time_var_index, k]]
+        return self.R[:, :, int(self.index[self.R_time_var_index, k])]

    def dRk(self, k):
        if self.dR is None:
@ -305,7 +311,7 @@ class Std_Measurement_Callables_Python(R_handling_Class):
            P: parameter for Jacobian, usually covariance matrix.
        """

-        return self.H[:, :, self.index[self.H_time_var_index, k]]
+        return self.H[:, :, int(self.index[self.H_time_var_index, k])]

    def dHk(self, k):
        if self.dH is None:
@ -2303,6 +2309,8 @@ class ContDescrStateSpace(DescreteStateSpace):
            self.v_dQk = None

            self.square_root_computed = False
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
            # !!!Print statistics! Which object is created

        def f_a(self, k,m,A):
@ -2337,7 +2345,10 @@ class ContDescrStateSpace(DescreteStateSpace):
                self.v_Qk = v_Qk
                self.v_dAk = v_dAk
                self.v_dQk = v_dQk
+                
                self.Q_square_root_computed = False
+                self.Q_inverse_computed = False
+                self.Q_svd_computed = False
            else:
                v_Ak = self.v_Ak
                v_Qk = self.v_Qk
@ -2359,8 +2370,11 @@ class ContDescrStateSpace(DescreteStateSpace):
            self.last_k = 0
            self.last_k_computed = False
            self.compute_derivatives = compute_derivatives
+            
            self.Q_square_root_computed = False
-
+            self.Q_inverse_computed = False
+            self.Q_svd_computed = False
+            self.Q_eigen_computed = False
            return self

        def Ak(self,k,m,P):
@ -2381,12 +2395,19 @@ class ContDescrStateSpace(DescreteStateSpace):

        def Q_srk(self,k):
            """
+            Check square root, maybe rewriting for Spectral decomposition is needed.
            Square root of the noise matrix Q
            """

            if ((self.last_k == k) and (self.last_k_computed == True)):
                if not self.Q_square_root_computed:
-                    (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd
+                        
                    square_root = U * np.sqrt(S)
                    self.square_root_computed = True
                    self.Q_square_root = square_root
@ -2396,7 +2417,56 @@ class ContDescrStateSpace(DescreteStateSpace):
                raise ValueError("Square root of Q can not be computed")

            return square_root
+        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):        
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            
+            #import pdb; pdb.set_trace()
+                    
+            if ((self.last_k == k) and (self.last_k_computed == True)):
+                if not self.Q_inverse_computed:
+                    if not self.Q_svd_computed:
+                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
+                        self.Q_svd = (U, S, Vh)
+                        self.Q_svd_computed = True
+                    else:
+                        (U, S, Vh) = self.Q_svd

+                    Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
+                    
+                    self.Q_inverse_computed = True
+                    self.Q_inverse_r = Q_inverse_r
+                        
+                else:
+                    Q_inverse_r = self.Q_inverse_r
+            else:
+                raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
+                                     This requires some programming""")
+
+            return Q_inverse_r
+        
+        
        def return_last(self):
            """
            Function returns last computed matrices.
@ -2463,6 +2533,9 @@ class ContDescrStateSpace(DescreteStateSpace):
                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)

            self.Q_svd_dict = {}
+            self.Q_square_root_dict = {}
+            self.Q_inverse_dict = {}
+            
            self.last_k = None
             # !!!Print statistics! Which object is created
            # !!!Print statistics! Print sizes of matrices
@ -2503,17 +2576,66 @@ class ContDescrStateSpace(DescreteStateSpace):
            Square root of the noise matrix Q
            """
            matrix_index = self.reconstruct_indices[k]
-            if matrix_index in self.Q_svd_dict:
-                square_root = self.Q_svd_dict[matrix_index]
+            if matrix_index in self.Q_square_root_dict:
+                square_root = self.Q_square_root_dict[matrix_index]
            else:
-                (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
                                        full_matrices=False, compute_uv=True,
                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                    
                square_root = U * np.sqrt(S)
-                self.Q_svd_dict[matrix_index] = square_root
+                self.Q_square_root_dict[matrix_index] = square_root

            return square_root
+        
+        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
+            """
+            Function inverts Q matrix and regularizes the inverse.
+            Regularization is useful when original matrix is badly conditioned.
+            Function is currently used only in SparseGP code.
+            
+            Inputs:
+            ------------------------------
+            k: int
+            Iteration number.
+            
+            p_largest_cond_num: float
+            Largest condition value for the inverted matrix. If cond. number is smaller than that
+            no regularization happen.
+            
+            regularization_type: 1 or 2
+            Regularization type.
+            
+            regularization_type: int (1 or 2)
+            
+                type 1: 1/(S[k] + regularizer) regularizer is computed
+                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
+            """
+            #import pdb; pdb.set_trace()
+            
+            matrix_index = self.reconstruct_indices[k]
+            if matrix_index in self.Q_inverse_dict:
+                Q_inverse_r = self.Q_inverse_dict[matrix_index]
+            else:
+                
+                if matrix_index in self.Q_svd_dict:
+                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
+                else:
+                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
+                                        full_matrices=False, compute_uv=True,
+                                        overwrite_a=False, check_finite=False)
+                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
+                
+                Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
+                self.Q_inverse_dict[matrix_index] = Q_inverse_r

+            return Q_inverse_r
+            
+        
        def return_last(self):
            """
            Function returns last available matrices.
@ -3073,7 +3195,8 @@ class ContDescrStateSpace(DescreteStateSpace):
    @classmethod
    def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
                                 grad_params_no=None,
-                                 P_inf=None, dP_inf=None, dF = None, dQc=None):
+                                 P_inf=None, dP_inf=None, dF = None, dQc=None,
+                                 dt0=None):
        """
        Function return the object which is used in Kalman filter and/or
        smoother to obtain matrices A, Q and their derivatives for discrete model
@ -3110,7 +3233,14 @@ class ContDescrStateSpace(DescreteStateSpace):
        threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
        dt = np.empty((X.shape[0],))
        dt[1:] = np.diff(X[:,0],axis=0)
-        dt[0]  = 0#dt[1]
+        if dt0 is None:
+            dt[0]  = 0#dt[1]
+        else:
+            if isinstance(dt0,str):
+                dt = dt[1:]
+            else:
+                dt[0] = dt0
+            
        unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
        number_unique_indices = len(unique_indices)

@ -3161,7 +3291,10 @@ class ContDescrStateSpace(DescreteStateSpace):

            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})

-
+        TODO: this function can be redone to "preprocess dataset", when
+        close time points are handeled properly (with rounding parameter) and
+        values are averaged accordingly.
+        
        Input:
        --------------
        F,L: LTI SDE matrices of corresponding dimensions
@ -3222,11 +3355,9 @@ class ContDescrStateSpace(DescreteStateSpace):
        n = F.shape[0]

        if not isinstance(dt, collections.Iterable): # not iterable, scalar
-
+            #import pdb; pdb.set_trace()
            # The dynamical model
            A  = matrix_exponent(F*dt)
-            if np.any( np.isnan(A)):
-                A  = linalg.expm3(F*dt)

            # The covariance matrix Q by matrix fraction decomposition ->
            Phi = np.zeros((2*n,2*n))
@ -3265,15 +3396,17 @@ class ContDescrStateSpace(DescreteStateSpace):
                    # The discrete-time dynamical model*
                    if p==0:
                        A  = AA[:n,:n,p]
-                        Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
-                        Q_noise = Q_noise_2
+                        Q_noise_3  = P_inf - A.dot(P_inf).dot(A.T)
+                        Q_noise = Q_noise_3
                        #PP = A.dot(P).dot(A.T) + Q_noise_2

                    # The derivatives of A and Q
                    dA[:,:,p] = AA[n:,:n,p]
-                    dQ[:,:,p] = dP_inf[:,:,p] - dA[:,:,p].dot(P_inf).dot(A.T) \
-                       - A.dot(dP_inf[:,:,p]).dot(A.T) - A.dot(P_inf).dot(dA[:,:,p].T) # Rewrite not ro multiply two times
-
+                    tmp = dA[:,:,p].dot(P_inf).dot(A.T)
+                    dQ[:,:,p] = dP_inf[:,:,p] - tmp \
+                       - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
+                    
+                    dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
            else:
              dA = None
              dQ = None
@ -3282,7 +3415,7 @@ class ContDescrStateSpace(DescreteStateSpace):
 	
            #Q_noise = Q_noise_1

-            # Return
+            Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
            return A, Q_noise,None, dA, dQ

        else: # iterable, array
@ -3486,4 +3619,4 @@ def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):

    # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

-    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0, T
+    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0
--- a/GPy/models/state_space_model.py
+++ b/GPy/models/state_space_model.py
@ -1,6 +1,6 @@
 # Copyright (c) 2013, Arno Solin.
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-#
+# 
 # This implementation of converting GPs to state space models is based on the article:
 #
 #  @article{Sarkka+Solin+Hartikainen:2013,
@ -23,7 +23,16 @@ from . import state_space_main as ssm
 from . import state_space_setup as ss_setup

 class StateSpace(Model):
-    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, name='StateSpace'):
+    def __init__(self, X, Y, kernel=None, noise_var=1.0, kalman_filter_type = 'regular', use_cython = False, balance=False, name='StateSpace'):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
        super(StateSpace, self).__init__(name=name)

        if len(X.shape) == 1:
@ -51,15 +60,16 @@ class StateSpace(Model):
        ss_setup.use_cython = use_cython

        #import pdb; pdb.set_trace()
-
+        self.balance = balance
+        
        global ssm
        #from . import state_space_main as ssm
        if (ssm.cython_code_available) and (ssm.use_cython != ss_setup.use_cython):
            reload(ssm)
        # Make sure the observations are ordered in time
        sort_index = np.argsort(X[:,0])
-        self.X = X[sort_index]
-        self.Y = Y[sort_index]
+        self.X = X[sort_index,:]
+        self.Y = Y[sort_index,:]

        # Noise variance
        self.likelihood = likelihoods.Gaussian(variance=noise_var)
@ -86,11 +96,12 @@ class StateSpace(Model):

        #np.set_printoptions(16)
        #print(self.param_array)
-        #import pdb; pdb.set_trace()
+        

        # Get the model matrices from the kernel
        (F,L,Qc,H,P_inf, P0, dFt,dQct,dP_inft, dP0t) = self.kern.sde()
-
+        
+        
        # necessary parameters
        measurement_dim = self.output_dim
        grad_params_no = dFt.shape[2]+1 # we also add measurement noise as a parameter
@ -112,8 +123,9 @@ class StateSpace(Model):
        dR[:,:,-1] = np.eye(measurement_dim)

        # Balancing
-        #(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
-
+        if self.balance:
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM parameters_changed balancing!")
        # Use the Kalman filter to evaluate the likelihood
        grad_calc_params = {}
        grad_calc_params['dP_inf'] = dP_inf
@ -125,7 +137,7 @@ class StateSpace(Model):
        kalman_filter_type = self.kalman_filter_type

        # The following code is required because sometimes the shapes of self.Y
-        # becomes 3D even though is must be 2D. The reason is undescovered.
+        # becomes 3D even though is must be 2D. The reason is undiscovered.
        Y = self.Y
        if self.ts_number is None:
            Y.shape = (self.num_data,1)
@ -146,7 +158,7 @@ class StateSpace(Model):

        if np.any( np.isfinite(grad_log_likelihood) == False):
            #import pdb; pdb.set_trace()
-            print("State-Space: NaN valkues in the grad_log_likelihood")
+            print("State-Space: NaN values in the grad_log_likelihood")
        #print(grad_log_likelihood)

        grad_log_likelihood_sum = np.sum(grad_log_likelihood,axis=1)
@ -159,7 +171,7 @@ class StateSpace(Model):
    def log_likelihood(self):
        return self._log_marginal_likelihood

-    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, **kw):
+    def _raw_predict(self, Xnew=None, Ynew=None, filteronly=False, p_balance=False, **kw):
        """
        Performs the actual prediction for new X points.
        Inner function. It is called only from inside this class.
@ -177,7 +189,10 @@ class StateSpace(Model):
        filteronly: bool
            Use only Kalman Filter for prediction. In this case the output does
            not coincide with corresponding Gaussian process.
-
+        
+        balance: bool
+            Whether to balance or not the model as a whole
+        
        Output:
        --------------------

@ -210,7 +225,12 @@ class StateSpace(Model):
        # Get the model matrices from the kernel
        (F,L,Qc,H,P_inf, P0, dF,dQc,dP_inf,dP0) = self.kern.sde()
        state_dim = F.shape[0]
-
+        
+        # Balancing
+        if (p_balance==True):
+            (F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf,dP0) = ssm.balance_ss_model(F,L,Qc,H,P_inf,P0, dF,dQc,dP_inf, dP0)
+            print("SSM _raw_predict balancing!")
+            
        #Y = self.Y[:, 0,0]
        # Run the Kalman filter
        #import pdb; pdb.set_trace()
@ -261,10 +281,23 @@ class StateSpace(Model):
        # Return the posterior of the state
        return (m, V)

-    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, **kw):
-
+    def predict(self, Xnew=None, filteronly=False, include_likelihood=True, balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
+            
        # Run the Kalman filter to get the state
-        (m, V) = self._raw_predict(Xnew,filteronly=filteronly)
+        (m, V) = self._raw_predict(Xnew,filteronly=filteronly, p_balance=p_balance)

        # Add the noise variance to the state variance
        if include_likelihood:
@ -277,8 +310,22 @@ class StateSpace(Model):
        # Return mean and variance
        return m, V

-    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), **kw):
-        mu, var = self._raw_predict(Xnew)
+    def predict_quantiles(self, Xnew=None, quantiles=(2.5, 97.5), balance=None, **kw):
+        """
+        Inputs:
+        ------------------
+        
+        balance: bool
+        Whether to balance or not the model as a whole
+        
+        """
+        if balance is None:
+            p_balance = self.balance
+        else:
+            p_balance = balance
+        
+        
+        mu, var = self._raw_predict(Xnew, p_balance=p_balance)
        #import pdb; pdb.set_trace()
        return  [stats.norm.ppf(q/100.)*np.sqrt(var + float(self.Gaussian_noise.variance)) + mu for q in quantiles]

--- a/GPy/models/tp_regression.py
+++ b/GPy/models/tp_regression.py
@ -0,0 +1,294 @@
+# Copyright (c) 2017 the GPy Austhors (see AUTHORS.txt)
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+from ..core import Model
+from ..core.parameterization import Param
+from ..core import Mapping
+from ..kern import Kern, RBF
+from ..inference.latent_function_inference import ExactStudentTInference
+from ..util.normalizer import Standardize
+
+import numpy as np
+from scipy import stats
+from paramz import ObsAr
+from paramz.transformations import Logexp
+
+import warnings
+
+
+class TPRegression(Model):
+    """
+    Student-t Process model for regression, as presented in
+
+       Shah, A., Wilson, A. and Ghahramani, Z., 2014, April. Student-t processes as alternatives to Gaussian processes.
+       In Artificial Intelligence and Statistics (pp. 877-885).
+
+    :param X: input observations
+    :param Y: observed values
+    :param kernel: a GPy kernel, defaults to rbf
+    :param deg_free: initial value for the degrees of freedom hyperparameter
+    :param Norm normalizer: [False]
+
+        Normalize Y with the norm given.
+        If normalizer is False, no normalization will be done
+        If it is None, we use GaussianNorm(alization)
+
+    .. Note:: Multiple independent outputs are allowed using columns of Y
+
+    """
+
+    def __init__(self, X, Y, kernel=None, deg_free=5., normalizer=None, mean_function=None, name='TP regression'):
+        super(TPRegression, self).__init__(name=name)
+        # X
+        assert X.ndim == 2
+        self.set_X(X)
+        self.num_data, self.input_dim = self.X.shape
+
+        # Y
+        assert Y.ndim == 2
+        if normalizer is True:
+            self.normalizer = Standardize()
+        elif normalizer is False:
+            self.normalizer = None
+        else:
+            self.normalizer = normalizer
+
+        self.set_Y(Y)
+
+        if Y.shape[0] != self.num_data:
+            # There can be cases where we want inputs than outputs, for example if we have multiple latent
+            # function values
+            warnings.warn("There are more rows in your input data X, \
+                                 than in your output data Y, be VERY sure this is what you want")
+        self.output_dim = self.Y.shape[1]
+
+        # Kernel
+        kernel = kernel or RBF(self.X.shape[1])
+        assert isinstance(kernel, Kern)
+        self.kern = kernel
+        self.link_parameter(self.kern)
+
+        if self.kern._effective_input_dim != self.X.shape[1]:
+            warnings.warn(
+                "Your kernel has a different input dimension {} then the given X dimension {}. Be very sure this is "
+                "what you want and you have not forgotten to set the right input dimenion in your kernel".format(
+                    self.kern._effective_input_dim, self.X.shape[1]))
+
+        # Mean function
+        self.mean_function = mean_function
+        if mean_function is not None:
+            assert isinstance(self.mean_function, Mapping)
+            assert mean_function.input_dim == self.input_dim
+            assert mean_function.output_dim == self.output_dim
+            self.link_parameter(mean_function)
+
+        # Degrees of freedom
+        self.nu = Param('deg_free', float(deg_free), Logexp())
+        self.link_parameter(self.nu)
+
+        # Inference
+        self.inference_method = ExactStudentTInference()
+        self.posterior = None
+        self._log_marginal_likelihood = None
+
+        # Insert property for plotting (not used)
+        self.Y_metadata = None
+
+    def _update_posterior_dof(self, dof, which):
+        if self.posterior is not None:
+            self.posterior.nu = dof
+
+    @property
+    def _predictive_variable(self):
+        return self.X
+
+    def set_XY(self, X, Y):
+        """
+        Set the input / output data of the model
+        This is useful if we wish to change our existing data but maintain the same model
+
+        :param X: input observations
+        :type X: np.ndarray
+        :param Y: output observations
+        :type Y: np.ndarray or ObsAr
+        """
+        self.update_model(False)
+        self.set_Y(Y)
+        self.set_X(X)
+        self.update_model(True)
+
+    def set_X(self, X):
+        """
+        Set the input data of the model
+
+        :param X: input observations
+        :type X: np.ndarray
+        """
+        assert isinstance(X, np.ndarray)
+        state = self.update_model()
+        self.update_model(False)
+        self.X = ObsAr(X)
+        self.update_model(state)
+
+    def set_Y(self, Y):
+        """
+        Set the output data of the model
+
+        :param Y: output observations
+        :type Y: np.ndarray or ObsArray
+        """
+        assert isinstance(Y, (np.ndarray, ObsAr))
+        state = self.update_model()
+        self.update_model(False)
+        if self.normalizer is not None:
+            self.normalizer.scale_by(Y)
+            self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
+            self.Y = Y
+        else:
+            self.Y = ObsAr(Y) if isinstance(Y, np.ndarray) else Y
+            self.Y_normalized = self.Y
+        self.update_model(state)
+
+    def parameters_changed(self):
+        """
+        Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
+        In particular in this class this method re-performs inference, recalculating the posterior, log marginal likelihood and gradients of the model
+
+        .. warning::
+            This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
+            this method yourself, there may be unexpected consequences.
+        """
+        self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern,
+                                                                                                   self.X,
+                                                                                                   self.Y_normalized,
+                                                                                                   self.nu + 2 + np.finfo(
+                                                                                                       float).eps,
+                                                                                                   self.mean_function)
+        self.kern.update_gradients_full(grad_dict['dL_dK'], self.X)
+        if self.mean_function is not None:
+            self.mean_function.update_gradients(grad_dict['dL_dm'], self.X)
+        self.nu.gradient = grad_dict['dL_dnu']
+
+    def log_likelihood(self):
+        """
+        The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
+        """
+        return self._log_marginal_likelihood or self.inference()[1]
+
+    def _raw_predict(self, Xnew, full_cov=False, kern=None):
+        """
+        For making predictions, does not account for normalization or likelihood
+
+        full_cov is a boolean which defines whether the full covariance matrix
+        of the prediction is computed. If full_cov is False (default), only the
+        diagonal of the covariance is returned.
+
+        .. math::
+            p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
+                        = MVN\left(\nu + N,f*| K_{x*x}(K_{xx})^{-1}Y,
+                        \frac{\nu + \beta - 2}{\nu + N - 2}K_{x*x*} - K_{xx*}(K_{xx})^{-1}K_{xx*}\right)
+            \nu := \texttt{Degrees of freedom}
+        """
+        mu, var = self.posterior._raw_predict(kern=self.kern if kern is None else kern, Xnew=Xnew,
+                                              pred_var=self._predictive_variable, full_cov=full_cov)
+        if self.mean_function is not None:
+            mu += self.mean_function.f(Xnew)
+        return mu, var
+
+    def predict(self, Xnew, full_cov=False, kern=None, **kwargs):
+        """
+        Predict the function(s) at the new point(s) Xnew. For Student-t processes, this method is equivalent to
+        predict_noiseless as no likelihood is included in the model.
+        """
+        return self.predict_noiseless(Xnew, full_cov=full_cov, kern=kern)
+
+    def predict_noiseless(self, Xnew, full_cov=False, kern=None):
+        """
+        Predict the underlying function  f at the new point(s) Xnew.
+
+        :param Xnew: The points at which to make a prediction
+        :type Xnew: np.ndarray (Nnew x self.input_dim)
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal
+        :type full_cov: bool
+        :param kern: The kernel to use for prediction (defaults to the model kern).
+
+        :returns: (mean, var):
+            mean: posterior mean, a Numpy array, Nnew x self.input_dim
+            var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
+
+           If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim.
+           If self.input_dim == 1, the return shape is Nnew x Nnew.
+           This is to allow for different normalizations of the output dimensions.
+        """
+        # Predict the latent function values
+        mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
+
+        # Un-apply normalization
+        if self.normalizer is not None:
+            mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
+
+        return mu, var
+
+    def predict_quantiles(self, X, quantiles=(2.5, 97.5), kern=None, **kwargs):
+        """
+        Get the predictive quantiles around the prediction at X
+
+        :param X: The points at which to make a prediction
+        :type X: np.ndarray (Xnew x self.input_dim)
+        :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
+        :type quantiles: tuple
+        :param kern: optional kernel to use for prediction
+        :type predict_kw: dict
+        :returns: list of quantiles for each X and predictive quantiles for interval combination
+        :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
+        """
+        mu, var = self._raw_predict(X, full_cov=False, kern=kern)
+        quantiles = [stats.t.ppf(q / 100., self.nu + 2 + self.num_data) * np.sqrt(var) + mu for q in quantiles]
+
+        if self.normalizer is not None:
+            quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
+
+        return quantiles
+
+    def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None, likelihood=None, **predict_kwargs):
+        """
+        Samples the posterior GP at the points X, equivalent to posterior_samples_f due to the absence of a likelihood.
+        """
+        return self.posterior_samples_f(X, size, full_cov=full_cov, **predict_kwargs)
+
+    def posterior_samples_f(self, X, size=10, full_cov=True, **predict_kwargs):
+        """
+        Samples the posterior TP at the points X.
+
+        :param X: The points at which to take the samples.
+        :type X: np.ndarray (Nnew x self.input_dim)
+        :param size: the number of a posteriori samples.
+        :type size: int.
+        :param full_cov: whether to return the full covariance matrix, or just the diagonal.
+        :type full_cov: bool.
+        :returns: fsim: set of simulations
+        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
+        """
+        mu, var = self._raw_predict(X, full_cov=full_cov, **predict_kwargs)
+        if self.normalizer is not None:
+            mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
+
+        def sim_one_dim(m, v):
+            nu = self.nu + 2 + self.num_data
+            v = np.diag(v.flatten()) if not full_cov else v
+            Z = np.random.multivariate_normal(np.zeros(X.shape[0]), v, size).T
+            g = np.tile(np.random.gamma(nu / 2., 2. / nu, size), (X.shape[0], 1))
+            return m + Z / np.sqrt(g)
+
+        if self.output_dim == 1:
+            return sim_one_dim(mu, var)
+        else:
+            fsim = np.empty((self.output_dim, self.num_data, size))
+            for d in range(self.output_dim):
+                if full_cov and var.ndim == 3:
+                    fsim[d] = sim_one_dim(mu[:, d], var[:, :, d])
+                elif (not full_cov) and var.ndim == 2:
+                    fsim[d] = sim_one_dim(mu[:, d], var[:, d])
+                else:
+                    fsim[d] = sim_one_dim(mu[:, d], var)
+        return fsim
--- a/GPy/plotting/init.py
+++ b/GPy/plotting/init.py
@ -92,6 +92,19 @@ def inject_plotting():
        SSGPLVM.plot_inducing = gpy_plot.latent_plots.plot_latent_inducing
        SSGPLVM.plot_steepest_gradient_map = gpy_plot.latent_plots.plot_steepest_gradient_map

+        from ..models import TPRegression
+        TPRegression.plot_data = gpy_plot.data_plots.plot_data
+        TPRegression.plot = gpy_plot.gp_plots.plot
+        TPRegression.plot_data_error = gpy_plot.data_plots.plot_data_error
+        TPRegression.plot_errorbars_trainset = gpy_plot.data_plots.plot_errorbars_trainset
+        TPRegression.plot_mean = gpy_plot.gp_plots.plot_mean
+        TPRegression.plot_confidence = gpy_plot.gp_plots.plot_confidence
+        TPRegression.plot_density = gpy_plot.gp_plots.plot_density
+        TPRegression.plot_samples = gpy_plot.gp_plots.plot_samples
+        TPRegression.plot_f = gpy_plot.gp_plots.plot_f
+        TPRegression.plot_latent = gpy_plot.gp_plots.plot_f
+        TPRegression.plot_noiseless = gpy_plot.gp_plots.plot_f
+
        from ..kern import Kern
        Kern.plot_covariance = gpy_plot.kernel_plots.plot_covariance
        def deprecate_plot(self, *args, **kwargs):
--- a/GPy/plotting/gpy_plot/gp_plots.py
+++ b/GPy/plotting/gpy_plot/gp_plots.py
@ -208,10 +208,10 @@ def _plot_samples(self, canvas, helper_data, helper_prediction, projection,
        if len(free_dims)==1:
            # 1D plotting:
            update_not_existing_kwargs(kwargs, pl().defaults.samples_1d)  # @UndefinedVariable
-            plots = [pl().plot(canvas, Xgrid[:, free_dims], samples[:, s], label=label if s==0 else None, **kwargs) for s in range(samples.shape[-1])]
+            plots = [pl().plot(canvas, Xgrid[:, free_dims], samples[:, :, s], label=label if s==0 else None, **kwargs) for s in range(samples.shape[-1])]
        elif len(free_dims)==2 and projection=='3d':
            update_not_existing_kwargs(kwargs, pl().defaults.samples_3d)  # @UndefinedVariable
-            plots = [pl().surface(canvas, x, y, samples[:, s].reshape(resolution, resolution), **kwargs) for s in range(samples.shape[-1])]
+            plots = [pl().surface(canvas, x, y, samples[:, :, s].reshape(resolution, resolution), **kwargs) for s in range(samples.shape[-1])]
        else:
            pass # Nothing to plot!
        return dict(gpmean=plots)
--- a/GPy/plotting/gpy_plot/plot_util.py
+++ b/GPy/plotting/gpy_plot/plot_util.py
@ -81,8 +81,8 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh
    else: percentiles = []

    if samples > 0:
-        fsamples = self.posterior_samples(Xgrid, full_cov=True, size=samples, **predict_kw)
-        fsamples = fsamples[which_data_ycols] if fsamples.ndim == 3 else fsamples
+        fsamples = self.posterior_samples(Xgrid, size=samples, **predict_kw)
+        fsamples = fsamples[:, which_data_ycols, :]
    else:
        fsamples = None

@ -95,12 +95,9 @@ def helper_predict_with_model(self, Xgrid, plot_raw, apply_link, percentiles, wh
            retmu[:, [i]] = self.likelihood.gp_link.transf(mu[:, [i]])
            for perc in percs:
                perc[:, [i]] = self.likelihood.gp_link.transf(perc[:, [i]])
-            if fsamples is not None and fsamples.ndim == 3:
+            if fsamples is not None:
                for s in range(fsamples.shape[-1]):
-                    fsamples[i, :, s] = self.likelihood.gp_link.transf(fsamples[i, :, s])
-            elif fsamples is not None:
-                for s in range(fsamples.shape[-1]):
-                    fsamples[:, s] = self.likelihood.gp_link.transf(fsamples[:, s])
+                    fsamples[:, i, s] = self.likelihood.gp_link.transf(fsamples[:, i, s])
    return retmu, percs, fsamples

 def helper_for_plot_data(self, X, plot_limits, visible_dims, fixed_inputs, resolution):
--- a/GPy/testing/baseline/bayesian_gplvm_gradient.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_gradient.npz
--- a/GPy/testing/baseline/bayesian_gplvm_inducing.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing.npz
--- a/GPy/testing/baseline/bayesian_gplvm_inducing_3d.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_inducing_3d.npz
--- a/GPy/testing/baseline/bayesian_gplvm_latent.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_latent.npz
--- a/GPy/testing/baseline/bayesian_gplvm_latent_3d.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_latent_3d.npz
--- a/GPy/testing/baseline/bayesian_gplvm_magnification.npz
+++ b/GPy/testing/baseline/bayesian_gplvm_magnification.npz
--- a/GPy/testing/baseline/coverage_3d_plot.npz
+++ b/GPy/testing/baseline/coverage_3d_plot.npz
--- a/GPy/testing/baseline/coverage_annotation_interact.npz
+++ b/GPy/testing/baseline/coverage_annotation_interact.npz
--- a/GPy/testing/baseline/coverage_gradient.npz
+++ b/GPy/testing/baseline/coverage_gradient.npz
--- a/GPy/testing/baseline/coverage_imshow_interact.npz
+++ b/GPy/testing/baseline/coverage_imshow_interact.npz
--- a/GPy/testing/baseline/gp_2d_data.npz
+++ b/GPy/testing/baseline/gp_2d_data.npz
--- a/GPy/testing/baseline/gp_2d_in_error.npz
+++ b/GPy/testing/baseline/gp_2d_in_error.npz
--- a/GPy/testing/baseline/gp_2d_inducing.npz
+++ b/GPy/testing/baseline/gp_2d_inducing.npz
--- a/GPy/testing/baseline/gp_2d_mean.npz
+++ b/GPy/testing/baseline/gp_2d_mean.npz
--- a/GPy/testing/baseline/gp_3d_data.npz
+++ b/GPy/testing/baseline/gp_3d_data.npz
--- a/GPy/testing/baseline/gp_3d_inducing.npz
+++ b/GPy/testing/baseline/gp_3d_inducing.npz
--- a/GPy/testing/baseline/gp_3d_mean.npz
+++ b/GPy/testing/baseline/gp_3d_mean.npz
--- a/GPy/testing/baseline/gp_class_likelihood.npz
+++ b/GPy/testing/baseline/gp_class_likelihood.npz
--- a/GPy/testing/baseline/gp_class_raw.npz
+++ b/GPy/testing/baseline/gp_class_raw.npz
--- a/GPy/testing/baseline/gp_class_raw_link.npz
+++ b/GPy/testing/baseline/gp_class_raw_link.npz
--- a/GPy/testing/baseline/gp_conf.npz
+++ b/GPy/testing/baseline/gp_conf.npz
--- a/GPy/testing/baseline/gp_data.npz
+++ b/GPy/testing/baseline/gp_data.npz
--- a/GPy/testing/baseline/gp_density.npz
+++ b/GPy/testing/baseline/gp_density.npz
--- a/GPy/testing/baseline/gp_in_error.npz
+++ b/GPy/testing/baseline/gp_in_error.npz
--- a/GPy/testing/baseline/gp_mean.npz
+++ b/GPy/testing/baseline/gp_mean.npz
--- a/GPy/testing/baseline/gp_out_error.npz
+++ b/GPy/testing/baseline/gp_out_error.npz
--- a/GPy/testing/baseline/gp_samples.npz
+++ b/GPy/testing/baseline/gp_samples.npz
--- a/GPy/testing/baseline/gplvm_gradient.npz
+++ b/GPy/testing/baseline/gplvm_gradient.npz
--- a/GPy/testing/baseline/gplvm_latent.npz
+++ b/GPy/testing/baseline/gplvm_latent.npz
--- a/GPy/testing/baseline/gplvm_latent_3d.npz
+++ b/GPy/testing/baseline/gplvm_latent_3d.npz
--- a/GPy/testing/baseline/gplvm_magnification.npz
+++ b/GPy/testing/baseline/gplvm_magnification.npz
--- a/GPy/testing/baseline/kern_ARD.npz
+++ b/GPy/testing/baseline/kern_ARD.npz
--- a/GPy/testing/baseline/kern_cov_1d.npz
+++ b/GPy/testing/baseline/kern_cov_1d.npz
--- a/GPy/testing/baseline/kern_cov_2d.npz
+++ b/GPy/testing/baseline/kern_cov_2d.npz
--- a/GPy/testing/baseline/kern_cov_3d.npz
+++ b/GPy/testing/baseline/kern_cov_3d.npz
--- a/GPy/testing/baseline/kern_cov_no_lim.npz
+++ b/GPy/testing/baseline/kern_cov_no_lim.npz
--- a/GPy/testing/baseline/sparse_gp_class_likelihood.npz
+++ b/GPy/testing/baseline/sparse_gp_class_likelihood.npz
--- a/GPy/testing/baseline/sparse_gp_class_raw.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw.npz
--- a/GPy/testing/baseline/sparse_gp_class_raw_link.npz
+++ b/GPy/testing/baseline/sparse_gp_class_raw_link.npz
--- a/GPy/testing/baseline/sparse_gp_data_error.npz
+++ b/GPy/testing/baseline/sparse_gp_data_error.npz
--- a/GPy/testing/cython_tests.py
+++ b/GPy/testing/cython_tests.py
@ -2,21 +2,27 @@ import numpy as np
 import scipy as sp
 from GPy.util import choleskies
 import GPy
-from ..util.config import config
 import unittest

+from ..util.config import config
+
 try:
-    from ..util import linalg_cython
    from ..util import choleskies_cython
-    config.set('cython', 'working', 'True')
+    choleskies_cython_working = config.getboolean('cython', 'working')
 except ImportError:
-    config.set('cython', 'working', 'False')
+    choleskies_cython_working = False
+
+try:
+    from ..kern.src import stationary_cython
+    stationary_cython_working = config.getboolean('cython', 'working')
+except ImportError:
+    stationary_cython_working = False

 """
 These tests make sure that the pure python and cython codes work the same
 """

-@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
+@unittest.skipIf(not choleskies_cython_working,"Cython cholesky module has not been built on this machine")
 class CythonTestChols(np.testing.TestCase):
    def setUp(self):
        self.flat = np.random.randn(45,5)
@ -30,7 +36,7 @@ class CythonTestChols(np.testing.TestCase):
        A2 = choleskies._triang_to_flat_cython(self.triang)
        np.testing.assert_allclose(A1, A2)

-@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
+@unittest.skipIf(not stationary_cython_working,"Cython stationary module has not been built on this machine")
 class test_stationary(np.testing.TestCase):
    def setUp(self):
        self.k = GPy.kern.RBF(10)
@ -60,7 +66,7 @@ class test_stationary(np.testing.TestCase):
        g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
        np.testing.assert_allclose(g1, g2)

-@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
+@unittest.skipIf(not choleskies_cython_working,"Cython cholesky module has not been built on this machine")
 class test_choleskies_backprop(np.testing.TestCase):
    def setUp(self):
        a =np.random.randn(10,12)
--- a/GPy/testing/gpy_kernels_state_space_tests.py
+++ b/GPy/testing/gpy_kernels_state_space_tests.py
@ -91,12 +91,14 @@ class StateSpaceKernelsTests(np.testing.TestCase):
                           mean_compare_decimal=5, var_compare_decimal=5)

    def test_RBF_kernel(self,):
+        #import pdb;pdb.set_trace()
+        
        np.random.seed(234) # seed the random number generator
        (X,Y) = generate_sine_data(x_points=None, sin_period=5.0, sin_ampl=10.0, noise_var=2.0,
                        plot = False, points_num=50, x_interval = (0, 20), random=True)
        X.shape = (X.shape[0],1); Y.shape = (Y.shape[0],1)

-        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,])
+        ss_kernel = GPy.kern.sde_RBF(1, 110., 1.5, active_dims=[0,], balance=True, approx_order=10)
        gp_kernel = GPy.kern.RBF(1, 110., 1.5, active_dims=[0,])

        self.run_for_model(X, Y, ss_kernel, check_gradients=True,
@ -267,7 +269,7 @@ class StateSpaceKernelsTests(np.testing.TestCase):
                               gp_kernel=gp_kernel,
                               mean_compare_decimal=2, var_compare_decimal=2)
        except AssertionError:
-            raise SkipTest("Skipping Regular kalman filter for kernel addition, as it seems to be bugged for some python versions")
+            raise SkipTest("Skipping Regular kalman filter for kernel addition, because it is not stable (normal situation) for this data.")


    def test_kernel_multiplication(self,):
@ -436,7 +438,7 @@ if __name__ == "__main__":
    print("Running state-space inference tests...")
    unittest.main()

-    #tt = StateSpaceKernelsTests('test_periodic_kernel')
+    #tt = StateSpaceKernelsTests('test_RBF_kernel')
    #import pdb; pdb.set_trace()
    #tt.test_Matern32_kernel()
    #tt.test_Matern52_kernel()
--- a/GPy/testing/kernel_tests.py
+++ b/GPy/testing/kernel_tests.py
@ -14,10 +14,10 @@ from ..util.config import config
 verbose = 0

 try:
-    from ..util import linalg_cython
-    config.set('cython', 'working', 'True')
+    from ..kern.src import coregionalize_cython
+    cython_coregionalize_working = config.getboolean('cython', 'working')
 except ImportError:
-    config.set('cython', 'working', 'False')
+    cython_coregionalize_working = False


 class Kern_check_model(GPy.core.Model):
@ -482,7 +482,19 @@ class KernelGradientTestsContinuous(unittest.TestCase):
        k = GPy.kern.StdPeriodic(self.D)
        k.randomize()
        self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
-    
+
+    def test_symmetric_even(self):
+        k_base = GPy.kern.Linear(1) + GPy.kern.RBF(1)
+        transform = -np.array([[1.0]])
+        k = GPy.kern.Symmetric(k_base, transform, 'even')
+        self.assertTrue(check_kernel_gradient_functions(k))
+
+    def test_symmetric_odd(self):
+        k_base = GPy.kern.Linear(1) + GPy.kern.RBF(1)
+        transform = -np.array([[1.0]])
+        k = GPy.kern.Symmetric(k_base, transform, 'odd')
+        self.assertTrue(check_kernel_gradient_functions(k))
+
    def test_MultioutputKern(self):
        k1 = GPy.kern.RBF(self.D, ARD=True)
        k1.randomize()
@ -629,7 +641,7 @@ class KernelTestsNonContinuous(unittest.TestCase):
        kern = GPy.kern.Coregionalize(1, output_dim=3, active_dims=[-1])
        self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))

-@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
+@unittest.skipIf(not cython_coregionalize_working,"Cython coregionalize module has not been built on this machine")
 class Coregionalize_cython_test(unittest.TestCase):
    """
    Make sure that the coregionalize kernel work with and without cython enabled
@ -642,42 +654,44 @@ class Coregionalize_cython_test(unittest.TestCase):

    def test_sym(self):
        dL_dK = np.random.randn(self.N1, self.N1)
-        GPy.util.config.config.set('cython', 'working', 'True')
-        K_cython = self.k.K(self.X)
+        K_cython = self.k._K_cython(self.X)
        self.k.update_gradients_full(dL_dK, self.X)
        grads_cython = self.k.gradient.copy()

-        GPy.util.config.config.set('cython', 'working', 'False')
-        K_numpy = self.k.K(self.X)
+        K_numpy = self.k._K_numpy(self.X)
+        # Nasty hack to ensure the numpy version is used for update_gradients
+        # If this test is running, cython is working, so override the cython
+        # function with the numpy function
+        _gradient_reduce_cython = self.k._gradient_reduce_cython
+        self.k._gradient_reduce_cython = self.k._gradient_reduce_numpy
        self.k.update_gradients_full(dL_dK, self.X)
+        # Undo hack
+        self.k._gradient_reduce_cython = _gradient_reduce_cython
        grads_numpy = self.k.gradient.copy()

        self.assertTrue(np.allclose(K_numpy, K_cython))
        self.assertTrue(np.allclose(grads_numpy, grads_cython))

-        #reset the cython state for any other tests
-        GPy.util.config.config.set('cython', 'working', 'true')
-
    def test_nonsym(self):
        dL_dK = np.random.randn(self.N1, self.N2)
-        GPy.util.config.config.set('cython', 'working', 'True')
-        K_cython = self.k.K(self.X, self.X2)
+        K_cython = self.k._K_cython(self.X, self.X2)
        self.k.gradient = 0.
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
        grads_cython = self.k.gradient.copy()

-        GPy.util.config.config.set('cython', 'working', 'False')
-        K_numpy = self.k.K(self.X, self.X2)
+        K_numpy = self.k._K_numpy(self.X, self.X2)
        self.k.gradient = 0.
+        # Same hack as in test_sym (Line 639)
+        _gradient_reduce_cython = self.k._gradient_reduce_cython
+        self.k._gradient_reduce_cython = self.k._gradient_reduce_numpy
        self.k.update_gradients_full(dL_dK, self.X, self.X2)
+        # Undo hack
+        self.k._gradient_reduce_cython = _gradient_reduce_cython
        grads_numpy = self.k.gradient.copy()

        self.assertTrue(np.allclose(K_numpy, K_cython))
        self.assertTrue(np.allclose(grads_numpy, grads_cython))

-        #reset the cython state for any other tests
-        GPy.util.config.config.set('cython', 'working', 'true')
-


 class KernelTestsProductWithZeroValues(unittest.TestCase):
--- a/GPy/testing/likelihood_tests.py
+++ b/GPy/testing/likelihood_tests.py
@ -128,7 +128,16 @@ class TestNoiseModels(object):
        censored[random_inds] = 1
        self.Y_metadata = dict()
        self.Y_metadata['censored'] = censored
-
+        self.Y_metadata['output_index'] = np.zeros((self.N,1), dtype=int)
+        self.Y_metadata2 = dict()
+        self.Y_metadata2['censored'] = censored
+        inds = np.zeros((self.N,1), dtype=int)
+        inds[5:10] = 1
+        inds[10:] = 2
+        self.Y_metadata2['output_index'] = inds
+        self.combY = self.Y
+        self.combY[10:] = np.where(self.binary_Y[10:] >0, self.binary_Y[10:], 0)
+        print(self.combY)
        #Make a bigger step as lower bound can be quite curved
        self.step = 1e-4

@ -292,6 +301,15 @@ class TestNoiseModels(object):
                "Y": self.positive_Y,
                "Y_metadata": self.Y_metadata,
                "laplace": True
+            },
+            "multioutput_default": {
+                "model": GPy.likelihoods.MultioutputLikelihood([GPy.likelihoods.Gaussian(), GPy.likelihoods.Poisson(), GPy.likelihoods.Bernoulli()]),
+                "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
+                "laplace": True,
+                "Y": self.combY,
+                "Y_metadata": self.Y_metadata2,
+                "ep": True,
+                "variational_expectations": True,
            }
            #,
            #GAMMA needs some work!"Gamma_default": {
@ -618,7 +636,7 @@ class TestNoiseModels(object):
        # Y = Y/Y.max()
        white_var = 1e-4
        kernel = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
-        ep_inf = GPy.inference.latent_function_inference.EP()
+        ep_inf = GPy.inference.latent_function_inference.EP(always_reset=True)

        m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, Y_metadata=Y_metadata, inference_method=ep_inf)
        m['.*white'].constrain_fixed(white_var)
--- a/GPy/testing/model_tests.py
+++ b/GPy/testing/model_tests.py
@ -812,6 +812,45 @@ class GradientTests(np.testing.TestCase):
        rbflin = GPy.kern.RBF(1) + GPy.kern.White(1)
        self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1)

+    def test_TPRegression_matern52_1D(self):
+        ''' Testing the TP regression with matern52 kernel on 1d data '''
+        matern52 = GPy.kern.Matern52(1) + GPy.kern.White(1)
+        self.check_model(matern52, model_type='TPRegression', dimension=1)
+
+    def test_TPRegression_rbf_2D(self):
+        ''' Testing the TP regression with rbf kernel on 2d data '''
+        rbf = GPy.kern.RBF(2)
+        self.check_model(rbf, model_type='TPRegression', dimension=2)
+
+    def test_TPRegression_rbf_ARD_2D(self):
+        ''' Testing the GP regression with rbf kernel on 2d data '''
+        k = GPy.kern.RBF(2, ARD=True)
+        self.check_model(k, model_type='TPRegression', dimension=2)
+
+    def test_TPRegression_matern52_2D(self):
+        ''' Testing the TP regression with matern52 kernel on 2d data '''
+        matern52 = GPy.kern.Matern52(2)
+        self.check_model(matern52, model_type='TPRegression', dimension=2)
+
+    def test_TPRegression_matern52_ARD_2D(self):
+        ''' Testing the TP regression with matern52 kernel on 2d data '''
+        matern52 = GPy.kern.Matern52(2, ARD=True)
+        self.check_model(matern52, model_type='TPRegression', dimension=2)
+
+    def test_TPRegression_matern32_1D(self):
+        ''' Testing the TP regression with matern32 kernel on 1d data '''
+        matern32 = GPy.kern.Matern32(1)
+        self.check_model(matern32, model_type='TPRegression', dimension=1)
+
+    def test_TPRegression_matern32_2D(self):
+        ''' Testing the TP regression with matern32 kernel on 2d data '''
+        matern32 = GPy.kern.Matern32(2)
+        self.check_model(matern32, model_type='TPRegression', dimension=2)
+
+    def test_TPRegression_matern32_ARD_2D(self):
+        ''' Testing the TP regression with matern32 kernel on 2d data '''
+        matern32 = GPy.kern.Matern32(2, ARD=True)
+        self.check_model(matern32, model_type='TPRegression', dimension=2)

    def test_GPLVM_rbf_bias_white_kern_2D(self):
        """ Testing GPLVM with rbf + bias kernel """
--- a/GPy/testing/plotting_tests.py
+++ b/GPy/testing/plotting_tests.py
@ -38,7 +38,7 @@ from nose import SkipTest

 try:
    import matplotlib
-    matplotlib.use('agg')
+    # matplotlib.use('agg')
 except ImportError:
    # matplotlib not installed
    from nose import SkipTest
@ -367,13 +367,13 @@ def test_classification():
    m = GPy.models.GPClassification(X, Y>Y.mean())
    #m.optimize()
    _, ax = plt.subplots()
-    m.plot(plot_raw=False, apply_link=False, ax=ax)
+    m.plot(plot_raw=False, apply_link=False, ax=ax, samples=3)
    m.plot_errorbars_trainset(plot_raw=False, apply_link=False, ax=ax)
    _, ax = plt.subplots()
-    m.plot(plot_raw=True, apply_link=False, ax=ax)
+    m.plot(plot_raw=True, apply_link=False, ax=ax, samples=3)
    m.plot_errorbars_trainset(plot_raw=True, apply_link=False, ax=ax)
    _, ax = plt.subplots()
-    m.plot(plot_raw=True, apply_link=True, ax=ax)
+    m.plot(plot_raw=True, apply_link=True, ax=ax, samples=3)
    m.plot_errorbars_trainset(plot_raw=True, apply_link=True, ax=ax)
    for do_test in _image_comparison(baseline_images=['gp_class_{}'.format(sub) for sub in ["likelihood", "raw", 'raw_link']], extensions=extensions):
        yield (do_test, )
--- a/GPy/testing/serialization_tests.py
+++ b/GPy/testing/serialization_tests.py
@ -11,6 +11,7 @@ import tempfile
 import GPy
 from nose import SkipTest
 import numpy as np
+import os
 fixed_seed = 11


@ -116,12 +117,38 @@ class Test(unittest.TestCase):
        np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
        np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])

-        e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
+        e2 = GPy.inference.latent_function_inference.expectation_propagation.EPDTC(ep_mode="nested")
+        e2.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
+        e2._ep_approximation = []
+        e2._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParamsDTC(np.random.rand(10),np.random.rand(10)))
+        e2._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
+        e2._ep_approximation.append(100.0)
        e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())

        assert type(e2) == type(e2_r)
+        assert e2.epsilon==e2_r.epsilon
+        assert e2.eta==e2_r.eta
+        assert e2.delta==e2_r.delta
+        assert e2.always_reset==e2_r.always_reset
+        assert e2.max_iters==e2_r.max_iters
+        assert e2.ep_mode==e2_r.ep_mode
+        assert e2.parallel_updates==e2_r.parallel_updates

-    def test_serialize_deserialize_model(self):
+        np.testing.assert_array_equal(e2.ga_approx_old.tau[:], e2_r.ga_approx_old.tau[:])
+        np.testing.assert_array_equal(e2.ga_approx_old.v[:], e2_r.ga_approx_old.v[:])
+        np.testing.assert_array_equal(e2._ep_approximation[0].mu[:], e2_r._ep_approximation[0].mu[:])
+        np.testing.assert_array_equal(e2._ep_approximation[0].Sigma_diag[:], e2_r._ep_approximation[0].Sigma_diag[:])
+        np.testing.assert_array_equal(e2._ep_approximation[1].tau[:], e2_r._ep_approximation[1].tau[:])
+        np.testing.assert_array_equal(e2._ep_approximation[1].v[:], e2_r._ep_approximation[1].v[:])
+        assert(e2._ep_approximation[2] == e2_r._ep_approximation[2])
+
+        e3 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
+        e3_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e3.to_dict())
+
+        assert type(e3) == type(e3_r)
+
+
+    def test_serialize_deserialize_GP(self):
        np.random.seed(fixed_seed)
        N = 20
        Nhalf = int(N/2)
@ -131,13 +158,13 @@ class Test(unittest.TestCase):
        likelihood = GPy.likelihoods.Bernoulli()
        inference_method=GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
        mean_function=None
+
        m = GPy.core.GP(X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='gp_classification')
        m.optimize()
        m.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
        m.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
        m1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
        m2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
-        import os
        os.remove("temp_test_gp_with_data.json.zip")
        os.remove("temp_test_gp_without_data.json.zip")
        var = m.predict(X)[0]
@ -146,7 +173,32 @@ class Test(unittest.TestCase):
        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())

-    def test_serialize_deserialize_inference_GPRegressor(self):
+    def test_serialize_deserialize_SparseGP(self):
+        np.random.seed(fixed_seed)
+        N = 20
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        likelihood = GPy.likelihoods.Bernoulli()
+        inference_method=GPy.inference.latent_function_inference.expectation_propagation.EPDTC(ep_mode="nested")
+        mean_function=None
+
+        sm = GPy.core.SparseGP(X=X, Y=Y, Z=X[0:20,:], kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='sparse_gp_classification')
+        sm.optimize()
+        sm.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
+        sm.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
+        sm1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
+        sm2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
+        os.remove("temp_test_gp_with_data.json.zip")
+        os.remove("temp_test_gp_without_data.json.zip")
+        var = sm.predict(X)[0]
+        var1_r = sm1_r.predict(X)[0]
+        var2_r = sm2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())
+
+    def test_serialize_deserialize_GPRegressor(self):
        np.random.seed(fixed_seed)
        N = 50
        N_new = 50
@ -161,7 +213,6 @@ class Test(unittest.TestCase):
        m.save_model("temp_test_gp_regressor_without_data.json", compress=True, save_data=False)
        m1_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_with_data.json.zip")
        m2_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_without_data.json.zip", (X,Y))
-        import os
        os.remove("temp_test_gp_regressor_with_data.json.zip")
        os.remove("temp_test_gp_regressor_without_data.json.zip")

@ -174,7 +225,7 @@ class Test(unittest.TestCase):
        np.testing.assert_array_equal(var.flatten(), var1_r.flatten())
        np.testing.assert_array_equal(var.flatten(), var2_r.flatten())

-    def test_serialize_deserialize_inference_GPClassifier(self):
+    def test_serialize_deserialize_GPClassification(self):
        np.random.seed(fixed_seed)
        N = 50
        Nhalf = int(N/2)
@ -186,8 +237,9 @@ class Test(unittest.TestCase):
        m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True)
        m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False)
        m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip")
+        self.assertTrue(type(m) == type(m1_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m1_r)))
        m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y))
-        import os
+        self.assertTrue(type(m) == type(m2_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m2_r)))
        os.remove("temp_test_gp_classifier_with_data.json.zip")
        os.remove("temp_test_gp_classifier_without_data.json.zip")

@ -197,6 +249,30 @@ class Test(unittest.TestCase):
        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())

+    def test_serialize_deserialize_SparseGPClassification(self):
+        np.random.seed(fixed_seed)
+        N = 50
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        m = GPy.models.SparseGPClassification(X, Y, num_inducing=3, kernel=kernel)
+        m.optimize()
+        m.save_model("temp_test_sparse_gp_classifier_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_sparse_gp_classifier_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.models.SparseGPClassification.load_model("temp_test_sparse_gp_classifier_with_data.json.zip")
+        self.assertTrue(type(m) == type(m1_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m1_r)))
+        m2_r = GPy.models.SparseGPClassification.load_model("temp_test_sparse_gp_classifier_without_data.json.zip", (X,Y))
+        self.assertTrue(type(m) == type(m2_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m2_r)))
+        os.remove("temp_test_sparse_gp_classifier_with_data.json.zip")
+        os.remove("temp_test_sparse_gp_classifier_without_data.json.zip")
+
+        var = m.predict(X)[0]
+        var1_r = m1_r.predict(X)[0]
+        var2_r = m2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+
 if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
    unittest.main()
--- a/GPy/testing/tp_tests.py
+++ b/GPy/testing/tp_tests.py
@ -0,0 +1,145 @@
+'''
+Created on 14 Jul 2017, based on gp_tests
+
+@author: javdrher
+'''
+import unittest
+import numpy as np, GPy
+
+
+class Test(unittest.TestCase):
+    def setUp(self):
+        np.random.seed(12345)
+        self.N = 20
+        self.N_new = 50
+        self.D = 1
+        self.X = np.random.uniform(-3., 3., (self.N, 1))
+        self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
+        self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
+
+    def test_setxy_gp(self):
+        k = GPy.kern.RBF(1) + GPy.kern.White(1)
+        m = GPy.models.TPRegression(self.X, self.Y, kernel=k)
+        mu, var = m.predict(m.X)
+        X = m.X.copy()
+        m.set_XY(m.X[:10], m.Y[:10])
+        assert (m.checkgrad(tolerance=1e-2))
+        m.set_XY(X, self.Y)
+        mu2, var2 = m.predict(m.X)
+        np.testing.assert_allclose(mu, mu2)
+        np.testing.assert_allclose(var, var2)
+
+    def test_mean_function(self):
+        from GPy.core.parameterization.param import Param
+        from GPy.core.mapping import Mapping
+
+        class Parabola(Mapping):
+            def __init__(self, variance, degree=2, name='parabola'):
+                super(Parabola, self).__init__(1, 1, name)
+                self.variance = Param('variance', np.ones(degree + 1) * variance)
+                self.degree = degree
+                self.link_parameter(self.variance)
+
+            def f(self, X):
+                p = self.variance[0] * np.ones(X.shape)
+                for i in range(1, self.degree + 1):
+                    p += self.variance[i] * X ** (i)
+                return p
+
+            def gradients_X(self, dL_dF, X):
+                grad = np.zeros(X.shape)
+                for i in range(1, self.degree + 1):
+                    grad += (i) * self.variance[i] * X ** (i - 1)
+                return grad
+
+            def update_gradients(self, dL_dF, X):
+                for i in range(self.degree + 1):
+                    self.variance.gradient[i] = (dL_dF * X ** (i)).sum(0)
+
+        X = np.linspace(-2, 2, 100)[:, None]
+        k = GPy.kern.RBF(1) + GPy.kern.White(1)
+        k.randomize()
+        p = Parabola(.3)
+        p.randomize()
+        Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X) + np.eye(X.shape[0]) * 1e-8)[:,
+                     None] + np.random.normal(0, .1, (X.shape[0], 1))
+        m = GPy.models.TPRegression(X, Y, kernel=k, mean_function=p)
+        assert (m.checkgrad(tolerance=2e-1))
+        _ = m.predict(m.X)
+
+    def test_normalizer(self):
+        k = GPy.kern.RBF(1) + GPy.kern.White(1)
+        Y = self.Y
+        mu, std = Y.mean(0), Y.std(0)
+        m = GPy.models.TPRegression(self.X, Y, kernel=k, normalizer=True)
+        m.optimize()
+        assert (m.checkgrad())
+        k = GPy.kern.RBF(1) + GPy.kern.White(1)
+        m2 = GPy.models.TPRegression(self.X, (Y - mu) / std, kernel=k, normalizer=False)
+        m2[:] = m[:]
+
+        mu1, var1 = m.predict(m.X, full_cov=True)
+        mu2, var2 = m2.predict(m2.X, full_cov=True)
+        np.testing.assert_allclose(mu1, (mu2 * std) + mu)
+        np.testing.assert_allclose(var1, var2 * std ** 2)
+
+        mu1, var1 = m.predict(m.X, full_cov=False)
+        mu2, var2 = m2.predict(m2.X, full_cov=False)
+
+        np.testing.assert_allclose(mu1, (mu2 * std) + mu)
+        np.testing.assert_allclose(var1, var2 * std ** 2)
+
+        q50n = m.predict_quantiles(m.X, (50,))
+        q50 = m2.predict_quantiles(m2.X, (50,))
+
+        np.testing.assert_allclose(q50n[0], (q50[0] * std) + mu)
+
+        # Test variance component:
+        qs = np.array([2.5, 97.5])
+        # The quantiles get computed before unormalization
+        # And transformed using the mean transformation:
+        c = np.random.choice(self.X.shape[0])
+        q95 = m2.predict_quantiles(self.X[[c]], qs)
+        mu, var = m2.predict(self.X[[c]])
+        from scipy.stats import t
+        np.testing.assert_allclose((mu + (t.ppf(qs / 100., m2.nu + m2.num_data) * np.sqrt(var))).flatten(),
+                                   np.array(q95).flatten())
+
+    def test_predict_equivalence(self):
+        k = GPy.kern.RBF(1) + GPy.kern.White(1)
+        m = GPy.models.TPRegression(self.X, self.Y, kernel=k)
+        m.optimize()
+        mu1, var1 = m.predict(m.X)
+        mu2, var2 = m.predict_noiseless(m.X)
+        mu3, var3 = m._raw_predict(m.X)
+        np.testing.assert_allclose(mu1, mu2)
+        np.testing.assert_allclose(var1, var2)
+        np.testing.assert_allclose(mu1, mu3)
+        np.testing.assert_allclose(var1, var3)
+
+        m2 = GPy.models.TPRegression(self.X, self.Y, kernel=k, normalizer=True)
+        m2.optimize()
+        mu1, var1 = m2.predict(m.X)
+        mu2, var2 = m2.predict_noiseless(m.X)
+        mu3, var3 = m2._raw_predict(m.X)
+        np.testing.assert_allclose(mu1, mu2)
+        np.testing.assert_allclose(var1, var2)
+        self.assertFalse(np.allclose(mu1, mu3))
+        self.assertFalse(np.allclose(var1, var3))
+
+    def test_gp_equivalence(self):
+        k = GPy.kern.RBF(1)
+        m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
+        m.optimize()
+        mu1, var1 = m.predict(self.X)
+        k1 = GPy.kern.RBF(1)
+        k1[:] = k[:]
+        k2 = GPy.kern.White(1, variance=m.likelihood.variance)
+        m2 = GPy.models.TPRegression(self.X, self.Y, kernel=k1 + k2, deg_free=1e6)
+        mu2, var2 = m2.predict(self.X)
+        np.testing.assert_allclose(mu1, mu2)
+        np.testing.assert_allclose(var1, var2)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/GPy/util/choleskies.py
+++ b/GPy/util/choleskies.py
@ -4,11 +4,14 @@
 import numpy as np
 from . import linalg
 from .config import config
+
 try:
    from . import choleskies_cython
-    config.set('cython', 'working', 'True')
+    use_choleskies_cython = config.getboolean('cython', 'working')
 except ImportError:
-    config.set('cython', 'working', 'False')
+    print('warning in choleskies: failed to import cython module: falling back to numpy')
+    use_choleskies_cython = False
+

 def safe_root(N):
    i = np.sqrt(N)
@ -100,7 +103,7 @@ def indexes_to_fix_for_low_rank(rank, size):
    return np.setdiff1d(np.arange((size**2+size)/2), keep)


-if config.getboolean('cython', 'working'):
+if use_choleskies_cython:
    triang_to_flat = _triang_to_flat_cython
    flat_to_triang = _flat_to_triang_cython
    backprop_gradient = choleskies_cython.backprop_gradient_par_c
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -11,8 +11,11 @@ from scipy.linalg import lapack, blas
 from .config import config
 import logging

-if config.getboolean('cython', 'working'):
+try:
    from . import linalg_cython
+    use_linalg_cython = config.getboolean('cython', 'working')
+except ImportError:
+    use_linalg_cython = False

 def force_F_ordered_symmetric(A):
    """
@ -359,7 +362,7 @@ def symmetrify(A, upper=False):

    note: tries to use cython, falls back to a slower numpy version
    """
-    if config.getboolean('cython', 'working'):
+    if use_linalg_cython:
        _symmetrify_cython(A, upper)
    else:
        _symmetrify_numpy(A, upper)
--- a/Show more
+++ b/Show more