Basic framework for serializing GPy models

2026-05-15 06:52:39 +02:00 · 2017-04-11 11:42:58 +01:00 · 2017-04-11 11:42:58 +01:00 · e572bfb746
commit e572bfb746
parent d529da3e6c
26 changed files with 828 additions and 64 deletions
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@ -109,17 +109,79 @@ class GP(Model):
        self.link_parameter(self.likelihood)
        self.posterior = None

-        # The predictive variable to be used to predict using the posterior object's
-        # woodbury_vector and woodbury_inv is defined as predictive_variable
-        # as long as the posterior has the right woodbury entries.
-        # It is the input variable used for the covariance between
-        # X_star and the posterior of the GP.
-        # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
-        # Make sure to name this variable and the predict functions will "just work"
-        # In maths the predictive variable is:
-        #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
-        #         W_{pp} := \texttt{Woodbury inv}
-        #         p := _predictive_variable
+    def to_dict(self, save_data=True):
+        input_dict = super(GP, self)._to_dict()
+        input_dict["class"] = "GPy.core.GP"
+        if not save_data:
+            input_dict["X"] = None
+            input_dict["Y"] = None
+        else:
+            try:
+                input_dict["X"] = self.X.values.tolist()
+            except:
+                input_dict["X"] = self.X.tolist()
+            try:
+                input_dict["Y"] = self.Y.values.tolist()
+            except:
+                input_dict["Y"] = self.Y.tolist()
+        input_dict["kernel"] = self.kern.to_dict()
+        input_dict["likelihood"] = self.likelihood.to_dict()
+        if self.mean_function is not None:
+            input_dict["mean_function"] = self.mean_function.to_dict()
+        input_dict["inference_method"] = self.inference_method.to_dict()
+        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
+        if self.Y_metadata is not None:
+            input_dict["Y_metadata"] = self.Y_metadata
+        if self.normalizer is not None:
+            input_dict["normalizer"] = self.normalizer.to_dict()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(input_dict, data=None):
+        import GPy
+        import numpy as np
+        if (input_dict['X'] is None) or (input_dict['Y'] is None):
+            assert(data is not None)
+            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
+        elif data is not None:
+            print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
+            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
+        else:
+            input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])
+        input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
+        input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])
+        mean_function = input_dict.get("mean_function")
+        if mean_function is not None:
+            input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)
+        else:
+            input_dict["mean_function"] = mean_function
+        input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])
+
+        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
+        Y_metadata = input_dict.get("Y_metadata")
+        input_dict["Y_metadata"] = Y_metadata
+
+        normalizer = input_dict.get("normalizer")
+        if normalizer is not None:
+            input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
+        else:
+            input_dict["normalizer"] = normalizer
+        return GP(**input_dict)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
+
+    # The predictive variable to be used to predict using the posterior object's
+    # woodbury_vector and woodbury_inv is defined as predictive_variable
+    # as long as the posterior has the right woodbury entries.
+    # It is the input variable used for the covariance between
+    # X_star and the posterior of the GP.
+    # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
+    # Make sure to name this variable and the predict functions will "just work"
+    # In maths the predictive variable is:
+    #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
+    #         W_{pp} := \texttt{Woodbury inv}
+    #         p := _predictive_variable

    @property
    def _predictive_variable(self):
@ -305,9 +367,9 @@ class GP(Model):
        m, v = self._raw_predict(X,  full_cov=False, kern=kern)
        if likelihood is None:
            likelihood = self.likelihood
-            
+
        quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
-        
+
        if self.normalizer is not None:
            quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
        return quantiles
@ -616,4 +678,3 @@ class GP(Model):
        """
        mu_star, var_star = self._raw_predict(x_test)
        return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
-
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@ -25,6 +25,30 @@ class Mapping(Parameterized):
    def update_gradients(self, dL_dF, X):
        raise NotImplementedError

+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["input_dim"] = self.input_dim
+        input_dict["output_dim"] = self.output_dim
+        input_dict["name"] = self.name
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        mapping_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        mapping_class = eval(mapping_class)
+        return mapping_class._from_dict(mapping_class, input_dict)
+
+    @staticmethod
+    def _from_dict(mapping_class, input_dict):
+        return mapping_class(**input_dict)
+

 class Bijective_mapping(Mapping):
    """
@ -37,5 +61,3 @@ class Bijective_mapping(Mapping):
    def g(self, f):
        """Inverse mapping from output domain of the function to the inputs."""
        raise NotImplementedError
-
-
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -8,6 +8,61 @@ class Model(ParamzModel, Priorizable):
    def __init__(self, name):
        super(Model, self).__init__(name)  # Parameterized.__init__(self)

+    def _to_dict(self):
+        input_dict = {}
+        input_dict["name"] = self.name
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict, data=None):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        model_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        model_class = eval(model_class)
+        return model_class._from_dict(input_dict, data)
+
+    @staticmethod
+    def _from_dict(model_class, input_dict, data=None):
+        return model_class(**input_dict)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        raise NotImplementedError
+
+    def _save_model(self, output_filename, compress=True, save_data=True):
+        import json
+        output_dict = self.to_dict(save_data)
+        if compress:
+            import gzip
+            with gzip.GzipFile(output_filename + ".zip", 'w') as outfile:
+                json_str = json.dumps(output_dict)
+                json_bytes = json_str.encode('utf-8')
+                outfile.write(json_bytes)
+        else:
+            with open(output_filename + ".json", 'w') as outfile:
+                json.dump(output_dict, outfile)
+
+    @staticmethod
+    def load_model(output_filename, data=None):
+        compress = output_filename.split(".")[-1] == "zip"
+        import json
+        if compress:
+            import gzip
+            with gzip.GzipFile(output_filename, 'r') as json_data:
+                json_bytes = json_data.read()
+                json_str = json_bytes.decode('utf-8')
+                output_dict = json.loads(json_str)
+        else:
+            with open(output_filename) as json_data:
+                output_dict = json.load(json_data)
+        import GPy
+        return GPy.core.model.Model.from_dict(output_dict, data)
+
+
    def log_likelihood(self):
        raise NotImplementedError("this needs to be implemented to use the model class")

--- a/GPy/inference/latent_function_inference/init.py
+++ b/GPy/inference/latent_function_inference/init.py
@ -41,6 +41,26 @@ class LatentFunctionInference(object):
        """
        pass

+    def _to_dict(self):
+        input_dict = {}
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        inference_class = input_dict.pop('class')
+        import GPy
+        inference_class = eval(inference_class)
+        return inference_class._from_dict(inference_class, input_dict)
+
+    @staticmethod
+    def _from_dict(inference_class, input_dict):
+        return inference_class(**input_dict)
+
 class InferenceMethodList(LatentFunctionInference, list):

    def on_optimization_start(self):
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@ -21,6 +21,11 @@ class ExactGaussianInference(LatentFunctionInference):
    def __init__(self):
        pass#self._YYTfactor_cache = caching.cache()

+    def to_dict(self):
+        input_dict = super(ExactGaussianInference, self)._to_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference"
+        return input_dict
+
    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
        """
        Returns a Posterior class containing essential quantities of the posterior
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@ -7,6 +7,7 @@ from . import ExactGaussianInference, VarDTC
 from ...util import diag
 from .posterior import PosteriorEP as Posterior
 from ...likelihoods import Gaussian
+from . import LatentFunctionInference

 log_2_pi = np.log(2*np.pi)

@ -26,6 +27,14 @@ class cavityParams(object):
    def _update_i(self, eta, ga_approx, post_params, i):
        self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
        self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
+    def to_dict(self):
+        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        c = cavityParams(len(input_dict["tau"]))
+        c.tau = np.array(input_dict["tau"])
+        c.v = np.array(input_dict["v"])
+        return c


 class gaussianApproximation(object):
@ -48,6 +57,11 @@ class gaussianApproximation(object):
        self.v[i] += delta_v

        return (delta_tau, delta_v)
+    def to_dict(self):
+        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        return gaussianApproximation(np.array(input_dict["v"]), np.array(input_dict["tau"]))


 class posteriorParamsBase(object):
@ -71,6 +85,20 @@ class posteriorParams(posteriorParamsBase):
        ci = delta_tau/(1.+ delta_tau*self.Sigma_diag[i])
        DSYR(self.Sigma, self.Sigma[:,i].copy(), -ci)
        self.mu = np.dot(self.Sigma, ga_approx.v)
+    def to_dict(self):
+        #TODO: Implement a more memory efficient variant
+        if self.L is None:
+            return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist()}
+        else:
+            return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist(), "L": self.L.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        if "L" in input_dict:
+            return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]), np.array(input_dict["L"]))
+        else:
+            return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]))
+
+

    @staticmethod
    def _recompute(K, ga_approx):
@ -112,7 +140,7 @@ class posteriorParamsDTC(posteriorParamsBase):
        return posteriorParamsDTC(mu, Sigma_diag), LLT

 class EPBase(object):
-    def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False):
+    def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False, loading=False):
        """
        The expectation-propagation algorithm.
        For nomenclature see Rasmussen & Williams 2006.
@ -128,6 +156,7 @@ class EPBase(object):
        :max_iters: int
        :ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters).
        :parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
+        :loading: boolean. If True, prevents the EP parameters to change. Hack used when loading a serialized model
        """
        super(EPBase, self).__init__()

@ -135,6 +164,8 @@ class EPBase(object):
        self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
        self.ep_mode = ep_mode
        self.parallel_updates = parallel_updates
+        #FIXME: Hack for serialiation. If True, prevents the EP parameters to change when loading a serialized model
+        self.loading = loading
        self.reset()

    def reset(self):
@ -161,9 +192,21 @@ class EPBase(object):
    def __getstate__(self):
        return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]

+    def _to_dict(self):
+        input_dict = super(EPBase, self)._to_dict()
+        input_dict["epsilon"]=self.epsilon
+        input_dict["eta"]=self.eta
+        input_dict["delta"]=self.delta
+        input_dict["always_reset"]=self.always_reset
+        input_dict["max_iters"]=self.max_iters
+        input_dict["ep_mode"]=self.ep_mode
+        input_dict["parallel_updates"]=self.parallel_updates
+        input_dict["loading"]=True
+        return input_dict
+
 class EP(EPBase, ExactGaussianInference):
    def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
-        if self.always_reset:
+        if self.always_reset and not self.loading:
            self.reset()

        num_data, output_dim = Y.shape
@ -172,11 +215,11 @@ class EP(EPBase, ExactGaussianInference):
        if K is None:
            K = kern.K(X)

-        if self.ep_mode=="nested":
+        if self.ep_mode=="nested" and not self.loading:
            #Force EP at each step of the optimization
            self._ep_approximation = None
            post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
-        elif self.ep_mode=="alternated":
+        elif self.ep_mode=="alternated" or self.loading:
            if getattr(self, '_ep_approximation', None) is None:
                #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
                post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
@ -186,6 +229,7 @@ class EP(EPBase, ExactGaussianInference):
        else:
            raise ValueError("ep_mode value not valid")

+        self.loading = False
        return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)

    def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -297,6 +341,36 @@ class EP(EPBase, ExactGaussianInference):
        dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
        return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

+    def to_dict(self):
+        input_dict = super(EP, self)._to_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EP"
+        if self.ga_approx_old is not  None:
+            input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
+        if self._ep_approximation is not  None:
+            input_dict["_ep_approximation"] = {}
+            input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
+            input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
+            input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
+            input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
+
+        return input_dict
+
+    @staticmethod
+    def _from_dict(inference_class, input_dict):
+        ga_approx_old = input_dict.pop('ga_approx_old', None)
+        if ga_approx_old is not None:
+            ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
+        _ep_approximation_dict = input_dict.pop('_ep_approximation', None)
+        _ep_approximation = []
+        if _ep_approximation is not None:
+            _ep_approximation.append(posteriorParams.from_dict(_ep_approximation_dict["post_params"]))
+            _ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
+            _ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
+            _ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
+        ee = EP(**input_dict)
+        ee.ga_approx_old = ga_approx_old
+        ee._ep_approximation = _ep_approximation
+        return ee

 class EPDTC(EPBase, VarDTC):
    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@ -13,9 +13,9 @@ class Add(CombinationKernel):
    propagates gradients through.

    This kernel will take over the active dims of it's subkernels passed in.
-    
-    NOTE: The subkernels will be copies of the original kernels, to prevent 
-    unexpected behavior. 
+
+    NOTE: The subkernels will be copies of the original kernels, to prevent
+    unexpected behavior.
    """
    def __init__(self, subkerns, name='sum'):
        _newkerns = []
@ -26,7 +26,7 @@ class Add(CombinationKernel):
                    _newkerns.append(part.copy())
            else:
                _newkerns.append(kern.copy())
-                    
+
        super(Add, self).__init__(_newkerns, name)
        self._exact_psicomp = self._check_exact_psicomp()

@ -43,6 +43,11 @@ class Add(CombinationKernel):
        else:
            return False

+    def to_dict(self):
+        input_dict = super(Add, self)._to_dict()
+        input_dict["class"] = str("GPy.kern.Add")
+        return input_dict
+
    @Cache_this(limit=3, force_kwargs=['which_parts'])
    def K(self, X, X2=None, which_parts=None):
        """
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@ -60,6 +60,35 @@ class Kern(Parameterized):
        from .psi_comp import PSICOMP_GH
        self.psicomp = PSICOMP_GH()

+    def _to_dict(self):
+        input_dict = {}
+        input_dict["input_dim"] = self.input_dim
+        if isinstance(self.active_dims, np.ndarray):
+            input_dict["active_dims"] = self.active_dims.tolist()
+        else:
+            input_dict["active_dims"] = self.active_dims
+        input_dict["name"] = self.name
+        input_dict["useGPU"] = self.useGPU
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        kernel_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        kernel_class = eval(kernel_class)
+        return kernel_class._from_dict(kernel_class, input_dict)
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        return kernel_class(**input_dict)
+
+
    def __setstate__(self, state):
        self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
        super(Kern, self).__setstate__(state)
@ -342,6 +371,21 @@ class CombinationKernel(Kern):
        self.extra_dims = extra_dims
        self.link_parameters(*kernels)

+    def _to_dict(self):
+        input_dict = super(CombinationKernel, self)._to_dict()
+        input_dict["parts"]  = {}
+        for ii in range(len(self.parts)):
+            input_dict["parts"][ii] = self.parts[ii].to_dict()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        parts = input_dict.pop('parts', None)
+        subkerns = []
+        for pp in parts:
+            subkerns.append(Kern.from_dict(parts[pp]))
+        return kernel_class(subkerns)
+
    @property
    def parts(self):
        return self.parameters
--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@ -51,6 +51,18 @@ class Linear(Kern):
        self.link_parameter(self.variances)
        self.psicomp = PSICOMP_Linear()

+    def to_dict(self):
+        input_dict = super(Linear, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Linear"
+        input_dict["variances"] = self.variances.values.tolist()
+        input_dict["ARD"] = self.ARD
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Linear(**input_dict)
+
    @Cache_this(limit=3)
    def K(self, X, X2=None):
        if self.ARD:
@ -211,5 +223,3 @@ class LinearFull(Kern):
    def gradients_X_diag(self, dL_dKdiag, X):
        P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
        return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
-
-
--- a/GPy/kern/src/periodic.py
+++ b/GPy/kern/src/periodic.py
@ -400,4 +400,3 @@ class PeriodicMatern52(Periodic):
        self.variance.gradient = np.sum(dK_dvar*dL_dK)
        self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
        self.period.gradient = np.sum(dK_dper*dL_dK)
-
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@ -39,6 +39,11 @@ class Prod(CombinationKernel):
                    kernels.insert(i, part)
        super(Prod, self).__init__(kernels, name)

+    def to_dict(self):
+        input_dict = super(Prod, self)._to_dict()
+        input_dict["class"] = str("GPy.kern.Prod")
+        return input_dict
+
    @Cache_this(limit=3, force_kwargs=['which_parts'])
    def K(self, X, X2=None, which_parts=None):
        if which_parts is None:
@ -117,7 +122,7 @@ class Prod(CombinationKernel):
                part_param_num = len(p.param_array) # number of parameters in the part
                p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
                part_start_param_index += part_param_num
-                
+
    def sde(self):
        """
        """
@ -131,88 +136,88 @@ class Prod(CombinationKernel):
        dQc    = None
        dPinf  = None
        dP0  = None
-        
+
         # Assign models
        for p in self.parts:
            (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
-            
+
            # check derivative dimensions ->
-            number_of_parameters = len(p.param_array)            
+            number_of_parameters = len(p.param_array)
            assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
            assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
            assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
            # check derivative dimensions <-
-            
+
            # exception for periodic kernel
            if (p.name == 'std_periodic'):
-                Qct = P_inft  
-                dQct = dP_inft                 
-            
+                Qct = P_inft
+                dQct = dP_inft
+
            dF    = dkron(F,dF,Ft,dFt,'sum')
            dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
            dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
            dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
-            
+
            F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
            L    = np.kron(L,Lt)
            Qc   = np.kron(Qc,Qct)
            Pinf = np.kron(Pinf,P_inft)
            P0 = np.kron(P0,P_inft)
            H    = np.kron(H,Ht)
-            
+
        return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

 def dkron(A,dA,B,dB, operation='prod'):
    """
-    Function computes the derivative of Kronecker product A*B 
+    Function computes the derivative of Kronecker product A*B
    (or Kronecker sum A+B).
-    
+
    Input:
    -----------------------
-    
+
    A: 2D matrix
-        Some matrix 
+        Some matrix
    dA: 3D (or 2D matrix)
        Derivarives of A
    B: 2D matrix
-        Some matrix 
+        Some matrix
    dB: 3D (or 2D matrix)
-        Derivarives of B    
-    
+        Derivarives of B
+
    operation: str 'prod' or 'sum'
        Which operation is considered. If the operation is 'sum' it is assumed
        that A and are square matrices.s
-    
+
    Output:
        dC: 3D matrix
        Derivative of Kronecker product A*B (or Kronecker sum A+B)
    """
-    
+
    if dA is None:
        dA_param_num = 0
        dA = np.zeros((A.shape[0], A.shape[1],1))
    else:
        dA_param_num = dA.shape[2]
-    
+
    if dB is None:
        dB_param_num = 0
-        dB = np.zeros((B.shape[0], B.shape[1],1))    
+        dB = np.zeros((B.shape[0], B.shape[1],1))
    else:
        dB_param_num = dB.shape[2]

    # Space allocation for derivative matrix
-    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))    
-    
+    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))
+
    for k in range(dA_param_num):
        if operation == 'prod':
            dC[:,:,k] = np.kron(dA[:,:,k],B);
        else:
            dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
-            
+
    for k in range(dB_param_num):
        if operation == 'prod':
            dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
        else:
            dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
-            
+
    return dC
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@ -31,6 +31,14 @@ class RBF(Stationary):
            self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
            self.link_parameter(self.inv_l)

+    def to_dict(self):
+        input_dict = super(RBF, self)._to_dict()
+        input_dict["class"] = "GPy.kern.RBF"
+        input_dict["inv_l"] = self.use_invLengthscale
+        if input_dict["inv_l"] == True:
+            input_dict["lengthscale"] = np.sqrt(1 / float(self.inv_l))
+        return input_dict
+
    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r**2)

@ -42,7 +50,7 @@ class RBF(Stationary):

    def dK2_drdr_diag(self):
        return -self.variance # as the diagonal of r is always filled with zeros
-    
+
    def __getstate__(self):
        dc = super(RBF, self).__getstate__()
        if self.useGPU:
--- a/GPy/kern/src/standard_periodic.py
+++ b/GPy/kern/src/standard_periodic.py
@ -93,6 +93,17 @@ class StdPeriodic(Kern):

        self.link_parameters(self.variance,  self.period, self.lengthscale)

+    def to_dict(self):
+        input_dict = super(StdPeriodic, self)._to_dict()
+        input_dict["class"] = "GPy.kern.StdPeriodic"
+        input_dict["variance"] = self.variance.values.tolist()
+        input_dict["period"] = self.period.values.tolist()
+        input_dict["lengthscale"] = self.lengthscale.values.tolist()
+        input_dict["ARD1"] = self.ARD1
+        input_dict["ARD2"] = self.ARD2
+        return input_dict
+
+
    def parameters_changed(self):
        """
        This functions deals as a callback for each optimization iteration.
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@ -14,6 +14,11 @@ class Static(Kern):
        self.variance = Param('variance', variance, Logexp())
        self.link_parameters(self.variance)

+    def _to_dict(self):
+        input_dict = super(Static, self)._to_dict()
+        input_dict["variance"] =  self.variance.values.tolist()
+        return input_dict
+
    def Kdiag(self, X):
        ret = np.empty((X.shape[0],), dtype=np.float64)
        ret[:] = self.variance
@ -133,6 +138,16 @@ class Bias(Static):
    def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
        super(Bias, self).__init__(input_dim, variance, active_dims, name)

+    def to_dict(self):
+        input_dict = super(Bias, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Bias"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Bias(**input_dict)
+
    def K(self, X, X2=None):
        shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
        return np.full(shape, self.variance, dtype=np.float64)
@ -250,4 +265,3 @@ class Precomputed(Fixed):

    def update_gradients_diag(self, dL_dKdiag, X):
        self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))
-
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@ -79,6 +79,13 @@ class Stationary(Kern):
        assert self.variance.size==1
        self.link_parameters(self.variance, self.lengthscale)

+    def _to_dict(self):
+        input_dict = super(Stationary, self)._to_dict()
+        input_dict["variance"] =  self.variance.values.tolist()
+        input_dict["lengthscale"] = self.lengthscale.values.tolist()
+        input_dict["ARD"] = self.ARD
+        return input_dict
+
    def K_of_r(self, r):
        raise NotImplementedError("implement the covariance function as a fn of r to use this class")

@ -351,6 +358,16 @@ class Exponential(Stationary):
    def dK_dr(self, r):
        return -self.K_of_r(r)

+    def to_dict(self):
+        input_dict = super(Exponential, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Exponential"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Exponential(**input_dict)
+
 #    def sde(self):
 #        """
 #        Return the state space representation of the covariance.
@ -399,6 +416,16 @@ class Matern32(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
        super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

+    def to_dict(self):
+        input_dict = super(Matern32, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Matern32"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Matern32(**input_dict)
+
    def K_of_r(self, r):
        return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)

@ -478,6 +505,16 @@ class Matern52(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
        super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

+    def to_dict(self):
+        input_dict = super(Matern52, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Matern52"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Matern52(**input_dict)
+
    def K_of_r(self, r):
        return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)

@ -533,6 +570,16 @@ class ExpQuad(Stationary):
    def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
        super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

+    def to_dict(self):
+        input_dict = super(ExpQuad, self)._to_dict()
+        input_dict["class"] = "GPy.kern.ExpQuad"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return ExpQuad(**input_dict)
+
    def K_of_r(self, r):
        return self.variance * np.exp(-0.5 * r**2)

@ -566,6 +613,17 @@ class RatQuad(Stationary):
        self.power = Param('power', power, Logexp())
        self.link_parameters(self.power)

+    def to_dict(self):
+        input_dict = super(RatQuad, self)._to_dict()
+        input_dict["class"] = "GPy.kern.RatQuad"
+        input_dict["power"] = self.power.values.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return RatQuad(**input_dict)
+
    def K_of_r(self, r):
        r2 = np.square(r)
 #         return self.variance*np.power(1. + r2/2., -self.power)
@ -588,5 +646,3 @@ class RatQuad(Stationary):
    def update_gradients_diag(self, dL_dKdiag, X):
        super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
        self.power.gradient = 0.
-
-
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@ -29,6 +29,11 @@ class Bernoulli(Likelihood):
        if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
            self.log_concave = True

+    def to_dict(self):
+        input_dict = super(Bernoulli, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.Bernoulli"
+        return input_dict
+
    def _preprocess_values(self, Y):
        """
        Check if the values of the observations correspond to the values
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@ -46,6 +46,13 @@ class Gaussian(Likelihood):
        if isinstance(gp_link, link_functions.Identity):
            self.log_concave = True

+    def to_dict(self):
+        input_dict = super(Gaussian, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.Gaussian"
+        input_dict["variance"] = self.variance.values.tolist()
+        return input_dict
+
+
    def betaY(self,Y,Y_metadata=None):
        #TODO: ~Ricardo this does not live here
        raise RuntimeError("Please notify the GPy developers, this should not happen")
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@ -44,6 +44,37 @@ class Likelihood(Parameterized):
        self.gp_link = gp_link
        self.log_concave = False
        self.not_block_really = False
+        self.name = name
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["name"] = self.name
+        input_dict["gp_link_dict"] = self.gp_link.to_dict()
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        likelihood_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        name = input_dict.pop('name')
+        import GPy
+        likelihood_class = eval(likelihood_class)
+        return likelihood_class._from_dict(likelihood_class, input_dict)
+
+    @staticmethod
+    def _from_dict(likelihood_class, input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        gp_link_dict = input_dict.pop('gp_link_dict')
+        import GPy
+        gp_link = GPy.likelihoods.link_functions.GPTransformation.from_dict(gp_link_dict)
+        input_dict["gp_link"] = gp_link
+        return likelihood_class(**input_dict)

    def request_num_latent_functions(self, Y):
        """
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@ -43,6 +43,25 @@ class GPTransformation(object):
        """
        raise NotImplementedError

+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        return {}
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        link_class = input_dict.pop('class')
+        import GPy
+        link_class = eval(link_class)
+        return link_class._from_dict(link_class, input_dict)
+
+    @staticmethod
+    def _from_dict(link_class, input_dict):
+        return link_class(**input_dict)
+
 class Identity(GPTransformation):
    """
    .. math::
@ -62,6 +81,10 @@ class Identity(GPTransformation):
    def d3transf_df3(self,f):
        return np.zeros_like(f)

+    def to_dict(self):
+        input_dict = super(Identity, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.link_functions.Identity"
+        return input_dict

 class Probit(GPTransformation):
    """
@ -82,6 +105,11 @@ class Probit(GPTransformation):
    def d3transf_df3(self,f):
        return (safe_square(f)-1.)*std_norm_pdf(f)

+    def to_dict(self):
+        input_dict = super(Probit, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
+        return input_dict
+

 class Cloglog(GPTransformation):
    """
--- a/GPy/mappings/constant.py
+++ b/GPy/mappings/constant.py
@ -38,3 +38,9 @@ class Constant(Mapping):

    def gradients_X(self, dL_dF, X):
        return np.zeros_like(X)
+
+    def to_dict(self):
+        input_dict = super(Constant, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Constant"
+        input_dict["value"] = self.C.values[0]
+        return input_dict
--- a/GPy/mappings/identity.py
+++ b/GPy/mappings/identity.py
@ -19,8 +19,7 @@ class Identity(Mapping):
    def gradients_X(self, dL_dF, X):
        return dL_dF

-
-
-
-
-
+    def to_dict(self):
+        input_dict = super(Identity, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Identity"
+        return input_dict
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@ -37,3 +37,21 @@ class Linear(Mapping):

    def gradients_X(self, dL_dF, X):
        return np.dot(dL_dF, self.A.T)
+
+    def to_dict(self):
+        input_dict = super(Linear, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Linear"
+        input_dict["A"] = self.A.values.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(mapping_class, input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        A = np.array(input_dict.pop('A'))
+        l = Linear(**input_dict)
+        l.unlink_parameter(l.A)
+        l.update_model(False)
+        l.A = Param('A', A)
+        l.link_parameter(l.A)
+        return l
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@ -4,6 +4,7 @@
 from ..core import GP
 from .. import likelihoods
 from .. import kern
+import numpy as np
 from ..inference.latent_function_inference.expectation_propagation import EP

 class GPClassification(GP):
@ -27,3 +28,23 @@ class GPClassification(GP):
        likelihood = likelihoods.Bernoulli()

        GP.__init__(self, X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification')
+
+    @staticmethod
+    def from_gp(gp):
+        from copy import deepcopy
+        gp = deepcopy(gp)
+        GPClassification(gp.X, gp.Y, gp.kern, gp.likelihood, gp.inference_method, gp.mean_function, name='gp_classification')
+
+    def to_dict(self, save_data=True):
+        model_dict = super(GPClassification,self).to_dict(save_data)
+        model_dict["class"] = "GPy.models.GPClassification"
+        return model_dict
+
+    @staticmethod
+    def from_dict(input_dict, data=None):
+        import GPy
+        m = GPy.core.model.Model.from_dict(input_dict, data)
+        return GPClassification.from_gp(m)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@ -35,3 +35,23 @@ class GPRegression(GP):

        super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)

+    @staticmethod
+    def from_gp(gp):
+        from copy import deepcopy
+        gp = deepcopy(gp)
+        return GPRegression(gp.X, gp.Y, gp.kern, gp.Y_metadata, gp.normalizer, gp.likelihood.variance.values, gp.mean_function)
+
+    def to_dict(self, save_data=True):
+        model_dict = super(GPRegression,self).to_dict(save_data)
+        model_dict["class"] = "GPy.models.GPRegression"
+        return model_dict
+
+    @staticmethod
+    def _from_dict(input_dict, data=None):
+        import GPy
+        input_dict["class"] = "GPy.core.GP"
+        m = GPy.core.GP.from_dict(input_dict, data)
+        return GPRegression.from_gp(m)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
--- a/GPy/testing/serialization_tests.py
+++ b/GPy/testing/serialization_tests.py
@ -0,0 +1,202 @@
+'''
+Created on 20 April 2017
+
+@author: pgmoren
+'''
+import unittest, itertools
+#import cPickle as pickle
+import pickle
+import numpy as np
+import tempfile
+import GPy
+from nose import SkipTest
+import numpy as np
+fixed_seed = 11
+
+
+class Test(unittest.TestCase):
+    def test_serialize_deserialize_kernels(self):
+        k1 = GPy.kern.RBF(2, variance=1.0, lengthscale=[1.0,1.0], ARD=True)
+        k2 = GPy.kern.RatQuad(2, variance=2.0, lengthscale=1.0, power=2.0, active_dims = [0,1])
+        k3 = GPy.kern.Bias(2, variance=2.0, active_dims = [1,0])
+        k4 = GPy.kern.StdPeriodic(2, variance=2.0, lengthscale=1.0, period=1.0, active_dims = [1,1])
+        k5 = GPy.kern.Linear(2, variances=[2.0, 1.0], ARD=True, active_dims = [1,1])
+        k6 = GPy.kern.Exponential(2, variance=1., lengthscale=2)
+        k7 = GPy.kern.Matern32(2, variance=1.0, lengthscale=[1.0,3.0], ARD=True, active_dims = [1,1])
+        k8 = GPy.kern.Matern52(2, variance=2.0, lengthscale=[2.0,1.0], ARD=True, active_dims = [1,0])
+        k9 = GPy.kern.ExpQuad(2, variance=3.0, lengthscale=[1.0,2.0], ARD=True, active_dims = [0,1])
+        k10 = k1 + k1.copy() + k2 + k3 + k4 + k5 + k6
+        k11 = k1 * k2 * k2.copy() * k3 * k4 * k5
+        k12 = (k1 + k2) * (k3 + k4 + k5)
+        k13 = ((k1 + k2) * k3) + k4 + k5 * k7
+        k14 = ((k1 + k2) * k3) + k4 * k5 + k8
+        k15 = ((k1 * k2) * k3) + k4 * k5 + k8 + k9
+
+        k_list = [k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15]
+
+        for kk in k_list:
+            kk_dict = kk.to_dict()
+            kk_r = GPy.kern.Kern.from_dict(kk_dict)
+            assert type(kk) == type(kk_r)
+            np.testing.assert_array_equal(kk[:], kk_r[:])
+            np.testing.assert_array_equal(np.array(kk.active_dims), np.array(kk_r.active_dims))
+
+    def test_serialize_deserialize_mappings(self):
+        m1 = GPy.mappings.Identity(3,2)
+        m2 = GPy.mappings.Constant(3,2,1)
+        m2_r = GPy.core.mapping.Mapping.from_dict(m2.to_dict())
+        np.testing.assert_array_equal(m2.C.values[:], m2_r.C.values[:])
+        m3 = GPy.mappings.Linear(3,2)
+        m3_r = GPy.core.mapping.Mapping.from_dict(m3.to_dict())
+        assert np.all(m3.A == m3_r.A)
+
+        m_list = [m1, m2, m3]
+        for mm in m_list:
+            mm_dict = mm.to_dict()
+            mm_r = GPy.core.mapping.Mapping.from_dict(mm_dict)
+            assert type(mm) == type(mm_r)
+            assert type(mm.input_dim) == type(mm_r.input_dim)
+            assert type(mm.output_dim) == type(mm_r.output_dim)
+
+    def test_serialize_deserialize_likelihoods(self):
+        l1 = GPy.likelihoods.Gaussian(GPy.likelihoods.link_functions.Identity(),variance=3.0)
+        l1_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l1.to_dict())
+        l2 = GPy.likelihoods.Bernoulli(GPy.likelihoods.link_functions.Probit())
+        l2_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l2.to_dict())
+        assert type(l1) == type(l1_r)
+        assert np.all(l1.variance == l1_r.variance)
+        assert type(l2) == type(l2_r)
+
+    def test_serialize_deserialize_normalizers(self):
+        n1 = GPy.util.normalizer.Standardize()
+        n1.scale_by(np.random.rand(10))
+        n1_r = GPy.util.normalizer._Norm.from_dict((n1.to_dict()))
+        assert type(n1) == type(n1_r)
+        assert np.all(n1.mean == n1_r.mean)
+        assert np.all(n1.std == n1_r.std)
+
+    def test_serialize_deserialize_link_functions(self):
+        l1 = GPy.likelihoods.link_functions.Identity()
+        l2 = GPy.likelihoods.link_functions.Probit()
+        l_list = [l1, l2]
+        for ll in l_list:
+            ll_dict = ll.to_dict()
+            ll_r = GPy.likelihoods.link_functions.GPTransformation.from_dict(ll_dict)
+            assert type(ll) == type(ll_r)
+
+    def test_serialize_deserialize_inference_methods(self):
+
+        e1 = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
+        e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
+        e1._ep_approximation = []
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParams(np.random.rand(10),np.random.rand(100).reshape((10,10))))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
+        e1._ep_approximation[-1].v = np.random.rand(10)
+        e1._ep_approximation[-1].tau = np.random.rand(10)
+        e1._ep_approximation.append(np.random.rand(10))
+        e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
+
+        assert type(e1) == type(e1_r)
+        assert e1.epsilon==e1_r.epsilon
+        assert e1.eta==e1_r.eta
+        assert e1.delta==e1_r.delta
+        assert e1.always_reset==e1_r.always_reset
+        assert e1.max_iters==e1_r.max_iters
+        assert e1.ep_mode==e1_r.ep_mode
+        assert e1.parallel_updates==e1_r.parallel_updates
+
+        np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
+        np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].Sigma[:], e1_r._ep_approximation[0].Sigma[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
+
+        e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
+        e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())
+
+        assert type(e2) == type(e2_r)
+
+    def test_serialize_deserialize_model(self):
+        np.random.seed(fixed_seed)
+        N = 20
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        likelihood = GPy.likelihoods.Bernoulli()
+        inference_method=GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
+        mean_function=None
+        m = GPy.core.GP(X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='gp_classification')
+        m.optimize()
+        m.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
+        m2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_with_data.json.zip")
+        os.remove("temp_test_gp_without_data.json.zip")
+        var = m.predict(X)[0]
+        var1_r = m1_r.predict(X)[0]
+        var2_r = m2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())
+
+    def test_serialize_deserialize_inference_GPRegressor(self):
+        np.random.seed(fixed_seed)
+        N = 50
+        N_new = 50
+        D = 1
+        X = np.random.uniform(-3., 3., (N, 1))
+        Y = np.sin(X) + np.random.randn(N, D) * 0.05
+        X_new = np.random.uniform(-3., 3., (N_new, 1))
+        k = GPy.kern.RBF(input_dim=1, lengthscale=10)
+        m = GPy.models.GPRegression(X,Y,k)
+        m.optimize()
+        m.save_model("temp_test_gp_regressor_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_regressor_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_with_data.json.zip")
+        m2_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_regressor_with_data.json.zip")
+        os.remove("temp_test_gp_regressor_without_data.json.zip")
+
+        Xp = np.random.uniform(size=(int(1e5),1))
+        Xp[:,0] = Xp[:,0]*15-5
+
+        _, var = m.predict(Xp)
+        _, var1_r = m1_r.predict(Xp)
+        _, var2_r = m2_r.predict(Xp)
+        np.testing.assert_array_equal(var.flatten(), var1_r.flatten())
+        np.testing.assert_array_equal(var.flatten(), var2_r.flatten())
+
+    def test_serialize_deserialize_inference_GPClassifier(self):
+        np.random.seed(fixed_seed)
+        N = 50
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        m = GPy.models.GPClassification(X, Y, kernel=kernel)
+        m.optimize()
+        m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip")
+        m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_classifier_with_data.json.zip")
+        os.remove("temp_test_gp_classifier_without_data.json.zip")
+
+        var = m.predict(X)[0]
+        var1_r = m1_r.predict(X)[0]
+        var2_r = m2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
+    unittest.main()
--- a/GPy/util/normalizer.py
+++ b/GPy/util/normalizer.py
@ -33,6 +33,27 @@ class _Norm(object):
        """
        raise NotImplementedError

+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        normalizer_class = input_dict.pop('class')
+        import GPy
+        normalizer_class = eval(normalizer_class)
+        return normalizer_class._from_dict(normalizer_class, input_dict)
+
+    @staticmethod
+    def _from_dict(normalizer_class, input_dict):
+        return normalizer_class(**input_dict)
+
+
 class Standardize(_Norm):
    def __init__(self):
        self.mean = None
@ -50,9 +71,26 @@ class Standardize(_Norm):
    def scaled(self):
        return self.mean is not None

+    def to_dict(self):
+        input_dict = super(Standardize, self)._to_dict()
+        input_dict["class"] = "GPy.util.normalizer.Standardize"
+        if self.mean is not None:
+            input_dict["mean"] = self.mean.tolist()
+            input_dict["std"] = self.std.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        s = Standardize()
+        if "mean" in input_dict:
+            s.mean = np.array(input_dict["mean"])
+        if "std" in input_dict:
+            s.std = np.array(input_dict["std"])
+        return s
+
 # Inverse variance to be implemented, disabling for now
 # If someone in the future want to implement this,
-# we need to implement the inverse variance for 
+# we need to implement the inverse variance for
 # normalization. This means, we need to know the factor
 # for the variance to be multiplied to the variance in
 # normalized space. This is easy to compute for standardization
@ -71,7 +109,7 @@ class Standardize(_Norm):
 #     def inverse_mean(self, X):
 #         return (X + .5) * (self.ymax - self.ymin) + self.ymin
 #     def inverse_variance(self, var):
-#         
+#
 #         return (var*(self.std**2))
 #     def scaled(self):
-#         return (self.ymin is not None) and (self.ymax is not None)
+#         return (self.ymin is not None) and (self.ymax is not None)