From e572bfb74632e3cb22a5d178b5d5886475cd6f0b Mon Sep 17 00:00:00 2001
From: Moreno <morepabl@amazon.com>
Date: Tue, 11 Apr 2017 11:42:58 +0100
Subject: [PATCH] Basic framework for serializing GPy models

---
 GPy/core/gp.py                                |  89 ++++++--
 GPy/core/mapping.py                           |  26 ++-
 GPy/core/model.py                             |  55 +++++
 .../latent_function_inference/__init__.py     |  20 ++
 .../exact_gaussian_inference.py               |   5 +
 .../expectation_propagation.py                |  82 ++++++-
 GPy/kern/src/add.py                           |  13 +-
 GPy/kern/src/kern.py                          |  44 ++++
 GPy/kern/src/linear.py                        |  14 +-
 GPy/kern/src/periodic.py                      |   1 -
 GPy/kern/src/prod.py                          |  55 ++---
 GPy/kern/src/rbf.py                           |  10 +-
 GPy/kern/src/standard_periodic.py             |  11 +
 GPy/kern/src/static.py                        |  16 +-
 GPy/kern/src/stationary.py                    |  60 +++++-
 GPy/likelihoods/bernoulli.py                  |   5 +
 GPy/likelihoods/gaussian.py                   |   7 +
 GPy/likelihoods/likelihood.py                 |  31 +++
 GPy/likelihoods/link_functions.py             |  28 +++
 GPy/mappings/constant.py                      |   6 +
 GPy/mappings/identity.py                      |   9 +-
 GPy/mappings/linear.py                        |  18 ++
 GPy/models/gp_classification.py               |  21 ++
 GPy/models/gp_regression.py                   |  20 ++
 GPy/testing/serialization_tests.py            | 202 ++++++++++++++++++
 GPy/util/normalizer.py                        |  44 +++-
 26 files changed, 828 insertions(+), 64 deletions(-)
 create mode 100644 GPy/testing/serialization_tests.py

diff --git a/GPy/core/gp.py b/GPy/core/gp.py
index 7f23e5af..5546eac6 100644
--- a/GPy/core/gp.py
+++ b/GPy/core/gp.py
@@ -109,17 +109,79 @@ class GP(Model):
         self.link_parameter(self.likelihood)
         self.posterior = None
 
-        # The predictive variable to be used to predict using the posterior object's
-        # woodbury_vector and woodbury_inv is defined as predictive_variable
-        # as long as the posterior has the right woodbury entries.
-        # It is the input variable used for the covariance between
-        # X_star and the posterior of the GP.
-        # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
-        # Make sure to name this variable and the predict functions will "just work"
-        # In maths the predictive variable is:
-        #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
-        #         W_{pp} := \texttt{Woodbury inv}
-        #         p := _predictive_variable
+    def to_dict(self, save_data=True):
+        input_dict = super(GP, self)._to_dict()
+        input_dict["class"] = "GPy.core.GP"
+        if not save_data:
+            input_dict["X"] = None
+            input_dict["Y"] = None
+        else:
+            try:
+                input_dict["X"] = self.X.values.tolist()
+            except:
+                input_dict["X"] = self.X.tolist()
+            try:
+                input_dict["Y"] = self.Y.values.tolist()
+            except:
+                input_dict["Y"] = self.Y.tolist()
+        input_dict["kernel"] = self.kern.to_dict()
+        input_dict["likelihood"] = self.likelihood.to_dict()
+        if self.mean_function is not None:
+            input_dict["mean_function"] = self.mean_function.to_dict()
+        input_dict["inference_method"] = self.inference_method.to_dict()
+        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
+        if self.Y_metadata is not None:
+            input_dict["Y_metadata"] = self.Y_metadata
+        if self.normalizer is not None:
+            input_dict["normalizer"] = self.normalizer.to_dict()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(input_dict, data=None):
+        import GPy
+        import numpy as np
+        if (input_dict['X'] is None) or (input_dict['Y'] is None):
+            assert(data is not None)
+            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
+        elif data is not None:
+            print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
+            input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
+        else:
+            input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])
+        input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
+        input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])
+        mean_function = input_dict.get("mean_function")
+        if mean_function is not None:
+            input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)
+        else:
+            input_dict["mean_function"] = mean_function
+        input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])
+
+        #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
+        Y_metadata = input_dict.get("Y_metadata")
+        input_dict["Y_metadata"] = Y_metadata
+
+        normalizer = input_dict.get("normalizer")
+        if normalizer is not None:
+            input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
+        else:
+            input_dict["normalizer"] = normalizer
+        return GP(**input_dict)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
+
+    # The predictive variable to be used to predict using the posterior object's
+    # woodbury_vector and woodbury_inv is defined as predictive_variable
+    # as long as the posterior has the right woodbury entries.
+    # It is the input variable used for the covariance between
+    # X_star and the posterior of the GP.
+    # This is usually just a link to self.X (full GP) or self.Z (sparse GP).
+    # Make sure to name this variable and the predict functions will "just work"
+    # In maths the predictive variable is:
+    #         K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
+    #         W_{pp} := \texttt{Woodbury inv}
+    #         p := _predictive_variable
 
     @property
     def _predictive_variable(self):
@@ -305,9 +367,9 @@ class GP(Model):
         m, v = self._raw_predict(X,  full_cov=False, kern=kern)
         if likelihood is None:
             likelihood = self.likelihood
-            
+
         quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
-        
+
         if self.normalizer is not None:
             quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
         return quantiles
@@ -616,4 +678,3 @@ class GP(Model):
         """
         mu_star, var_star = self._raw_predict(x_test)
         return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
-
diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py
index 9853ea8a..d8032c54 100644
--- a/GPy/core/mapping.py
+++ b/GPy/core/mapping.py
@@ -25,6 +25,30 @@ class Mapping(Parameterized):
     def update_gradients(self, dL_dF, X):
         raise NotImplementedError
 
+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["input_dim"] = self.input_dim
+        input_dict["output_dim"] = self.output_dim
+        input_dict["name"] = self.name
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        mapping_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        mapping_class = eval(mapping_class)
+        return mapping_class._from_dict(mapping_class, input_dict)
+
+    @staticmethod
+    def _from_dict(mapping_class, input_dict):
+        return mapping_class(**input_dict)
+
 
 class Bijective_mapping(Mapping):
     """
@@ -37,5 +61,3 @@ class Bijective_mapping(Mapping):
     def g(self, f):
         """Inverse mapping from output domain of the function to the inputs."""
         raise NotImplementedError
-
-
diff --git a/GPy/core/model.py b/GPy/core/model.py
index 7da6552a..799d42bd 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -8,6 +8,61 @@ class Model(ParamzModel, Priorizable):
     def __init__(self, name):
         super(Model, self).__init__(name)  # Parameterized.__init__(self)
 
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["name"] = self.name
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict, data=None):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        model_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        model_class = eval(model_class)
+        return model_class._from_dict(input_dict, data)
+
+    @staticmethod
+    def _from_dict(model_class, input_dict, data=None):
+        return model_class(**input_dict)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        raise NotImplementedError
+
+    def _save_model(self, output_filename, compress=True, save_data=True):
+        import json
+        output_dict = self.to_dict(save_data)
+        if compress:
+            import gzip
+            with gzip.GzipFile(output_filename + ".zip", 'w') as outfile:
+                json_str = json.dumps(output_dict)
+                json_bytes = json_str.encode('utf-8')
+                outfile.write(json_bytes)
+        else:
+            with open(output_filename + ".json", 'w') as outfile:
+                json.dump(output_dict, outfile)
+
+    @staticmethod
+    def load_model(output_filename, data=None):
+        compress = output_filename.split(".")[-1] == "zip"
+        import json
+        if compress:
+            import gzip
+            with gzip.GzipFile(output_filename, 'r') as json_data:
+                json_bytes = json_data.read()
+                json_str = json_bytes.decode('utf-8')
+                output_dict = json.loads(json_str)
+        else:
+            with open(output_filename) as json_data:
+                output_dict = json.load(json_data)
+        import GPy
+        return GPy.core.model.Model.from_dict(output_dict, data)
+
+
     def log_likelihood(self):
         raise NotImplementedError("this needs to be implemented to use the model class")
 
diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py
index 3938a6a4..3f6af799 100644
--- a/GPy/inference/latent_function_inference/__init__.py
+++ b/GPy/inference/latent_function_inference/__init__.py
@@ -41,6 +41,26 @@ class LatentFunctionInference(object):
         """
         pass
 
+    def _to_dict(self):
+        input_dict = {}
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        inference_class = input_dict.pop('class')
+        import GPy
+        inference_class = eval(inference_class)
+        return inference_class._from_dict(inference_class, input_dict)
+
+    @staticmethod
+    def _from_dict(inference_class, input_dict):
+        return inference_class(**input_dict)
+
 class InferenceMethodList(LatentFunctionInference, list):
 
     def on_optimization_start(self):
diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
index a5519774..a0a35fa5 100644
--- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py
+++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py
@@ -21,6 +21,11 @@ class ExactGaussianInference(LatentFunctionInference):
     def __init__(self):
         pass#self._YYTfactor_cache = caching.cache()
 
+    def to_dict(self):
+        input_dict = super(ExactGaussianInference, self)._to_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference"
+        return input_dict
+
     def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
         """
         Returns a Posterior class containing essential quantities of the posterior
diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
index 81c020df..c6abcbf1 100644
--- a/GPy/inference/latent_function_inference/expectation_propagation.py
+++ b/GPy/inference/latent_function_inference/expectation_propagation.py
@@ -7,6 +7,7 @@ from . import ExactGaussianInference, VarDTC
 from ...util import diag
 from .posterior import PosteriorEP as Posterior
 from ...likelihoods import Gaussian
+from . import LatentFunctionInference
 
 log_2_pi = np.log(2*np.pi)
 
@@ -26,6 +27,14 @@ class cavityParams(object):
     def _update_i(self, eta, ga_approx, post_params, i):
         self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
         self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
+    def to_dict(self):
+        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        c = cavityParams(len(input_dict["tau"]))
+        c.tau = np.array(input_dict["tau"])
+        c.v = np.array(input_dict["v"])
+        return c
 
 
 class gaussianApproximation(object):
@@ -48,6 +57,11 @@ class gaussianApproximation(object):
         self.v[i] += delta_v
 
         return (delta_tau, delta_v)
+    def to_dict(self):
+        return {"tau": self.tau.tolist(), "v": self.v.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        return gaussianApproximation(np.array(input_dict["v"]), np.array(input_dict["tau"]))
 
 
 class posteriorParamsBase(object):
@@ -71,6 +85,20 @@ class posteriorParams(posteriorParamsBase):
         ci = delta_tau/(1.+ delta_tau*self.Sigma_diag[i])
         DSYR(self.Sigma, self.Sigma[:,i].copy(), -ci)
         self.mu = np.dot(self.Sigma, ga_approx.v)
+    def to_dict(self):
+        #TODO: Implement a more memory efficient variant
+        if self.L is None:
+            return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist()}
+        else:
+            return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist(), "L": self.L.tolist()}
+    @staticmethod
+    def from_dict(input_dict):
+        if "L" in input_dict:
+            return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]), np.array(input_dict["L"]))
+        else:
+            return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]))
+
+
 
     @staticmethod
     def _recompute(K, ga_approx):
@@ -112,7 +140,7 @@ class posteriorParamsDTC(posteriorParamsBase):
         return posteriorParamsDTC(mu, Sigma_diag), LLT
 
 class EPBase(object):
-    def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False):
+    def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False, loading=False):
         """
         The expectation-propagation algorithm.
         For nomenclature see Rasmussen & Williams 2006.
@@ -128,6 +156,7 @@ class EPBase(object):
         :max_iters: int
         :ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters).
         :parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
+        :loading: boolean. If True, prevents the EP parameters to change. Hack used when loading a serialized model
         """
         super(EPBase, self).__init__()
 
@@ -135,6 +164,8 @@ class EPBase(object):
         self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
         self.ep_mode = ep_mode
         self.parallel_updates = parallel_updates
+        #FIXME: Hack for serialiation. If True, prevents the EP parameters to change when loading a serialized model
+        self.loading = loading
         self.reset()
 
     def reset(self):
@@ -161,9 +192,21 @@ class EPBase(object):
     def __getstate__(self):
         return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
 
+    def _to_dict(self):
+        input_dict = super(EPBase, self)._to_dict()
+        input_dict["epsilon"]=self.epsilon
+        input_dict["eta"]=self.eta
+        input_dict["delta"]=self.delta
+        input_dict["always_reset"]=self.always_reset
+        input_dict["max_iters"]=self.max_iters
+        input_dict["ep_mode"]=self.ep_mode
+        input_dict["parallel_updates"]=self.parallel_updates
+        input_dict["loading"]=True
+        return input_dict
+
 class EP(EPBase, ExactGaussianInference):
     def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
-        if self.always_reset:
+        if self.always_reset and not self.loading:
             self.reset()
 
         num_data, output_dim = Y.shape
@@ -172,11 +215,11 @@ class EP(EPBase, ExactGaussianInference):
         if K is None:
             K = kern.K(X)
 
-        if self.ep_mode=="nested":
+        if self.ep_mode=="nested" and not self.loading:
             #Force EP at each step of the optimization
             self._ep_approximation = None
             post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
-        elif self.ep_mode=="alternated":
+        elif self.ep_mode=="alternated" or self.loading:
             if getattr(self, '_ep_approximation', None) is None:
                 #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
                 post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
@@ -186,6 +229,7 @@ class EP(EPBase, ExactGaussianInference):
         else:
             raise ValueError("ep_mode value not valid")
 
+        self.loading = False
         return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata,  Z_tilde=log_Z_tilde)
 
     def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@@ -297,6 +341,36 @@ class EP(EPBase, ExactGaussianInference):
         dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
         return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
 
+    def to_dict(self):
+        input_dict = super(EP, self)._to_dict()
+        input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EP"
+        if self.ga_approx_old is not  None:
+            input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
+        if self._ep_approximation is not  None:
+            input_dict["_ep_approximation"] = {}
+            input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
+            input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
+            input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
+            input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
+
+        return input_dict
+
+    @staticmethod
+    def _from_dict(inference_class, input_dict):
+        ga_approx_old = input_dict.pop('ga_approx_old', None)
+        if ga_approx_old is not None:
+            ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
+        _ep_approximation_dict = input_dict.pop('_ep_approximation', None)
+        _ep_approximation = []
+        if _ep_approximation is not None:
+            _ep_approximation.append(posteriorParams.from_dict(_ep_approximation_dict["post_params"]))
+            _ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
+            _ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
+            _ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
+        ee = EP(**input_dict)
+        ee.ga_approx_old = ga_approx_old
+        ee._ep_approximation = _ep_approximation
+        return ee
 
 class EPDTC(EPBase, VarDTC):
     def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
diff --git a/GPy/kern/src/add.py b/GPy/kern/src/add.py
index ca20e5a9..c1834f76 100644
--- a/GPy/kern/src/add.py
+++ b/GPy/kern/src/add.py
@@ -13,9 +13,9 @@ class Add(CombinationKernel):
     propagates gradients through.
 
     This kernel will take over the active dims of it's subkernels passed in.
-    
-    NOTE: The subkernels will be copies of the original kernels, to prevent 
-    unexpected behavior. 
+
+    NOTE: The subkernels will be copies of the original kernels, to prevent
+    unexpected behavior.
     """
     def __init__(self, subkerns, name='sum'):
         _newkerns = []
@@ -26,7 +26,7 @@ class Add(CombinationKernel):
                     _newkerns.append(part.copy())
             else:
                 _newkerns.append(kern.copy())
-                    
+
         super(Add, self).__init__(_newkerns, name)
         self._exact_psicomp = self._check_exact_psicomp()
 
@@ -43,6 +43,11 @@ class Add(CombinationKernel):
         else:
             return False
 
+    def to_dict(self):
+        input_dict = super(Add, self)._to_dict()
+        input_dict["class"] = str("GPy.kern.Add")
+        return input_dict
+
     @Cache_this(limit=3, force_kwargs=['which_parts'])
     def K(self, X, X2=None, which_parts=None):
         """
diff --git a/GPy/kern/src/kern.py b/GPy/kern/src/kern.py
index 198ebd13..b9971b8c 100644
--- a/GPy/kern/src/kern.py
+++ b/GPy/kern/src/kern.py
@@ -60,6 +60,35 @@ class Kern(Parameterized):
         from .psi_comp import PSICOMP_GH
         self.psicomp = PSICOMP_GH()
 
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["input_dim"] = self.input_dim
+        if isinstance(self.active_dims, np.ndarray):
+            input_dict["active_dims"] = self.active_dims.tolist()
+        else:
+            input_dict["active_dims"] = self.active_dims
+        input_dict["name"] = self.name
+        input_dict["useGPU"] = self.useGPU
+        return input_dict
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        kernel_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        import GPy
+        kernel_class = eval(kernel_class)
+        return kernel_class._from_dict(kernel_class, input_dict)
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        return kernel_class(**input_dict)
+
+
     def __setstate__(self, state):
         self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
         super(Kern, self).__setstate__(state)
@@ -342,6 +371,21 @@ class CombinationKernel(Kern):
         self.extra_dims = extra_dims
         self.link_parameters(*kernels)
 
+    def _to_dict(self):
+        input_dict = super(CombinationKernel, self)._to_dict()
+        input_dict["parts"]  = {}
+        for ii in range(len(self.parts)):
+            input_dict["parts"][ii] = self.parts[ii].to_dict()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        parts = input_dict.pop('parts', None)
+        subkerns = []
+        for pp in parts:
+            subkerns.append(Kern.from_dict(parts[pp]))
+        return kernel_class(subkerns)
+
     @property
     def parts(self):
         return self.parameters
diff --git a/GPy/kern/src/linear.py b/GPy/kern/src/linear.py
index e7089fe1..10edb4c2 100644
--- a/GPy/kern/src/linear.py
+++ b/GPy/kern/src/linear.py
@@ -51,6 +51,18 @@ class Linear(Kern):
         self.link_parameter(self.variances)
         self.psicomp = PSICOMP_Linear()
 
+    def to_dict(self):
+        input_dict = super(Linear, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Linear"
+        input_dict["variances"] = self.variances.values.tolist()
+        input_dict["ARD"] = self.ARD
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Linear(**input_dict)
+
     @Cache_this(limit=3)
     def K(self, X, X2=None):
         if self.ARD:
@@ -211,5 +223,3 @@ class LinearFull(Kern):
     def gradients_X_diag(self, dL_dKdiag, X):
         P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
         return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
-
-
diff --git a/GPy/kern/src/periodic.py b/GPy/kern/src/periodic.py
index fe0c6670..3a7a515e 100644
--- a/GPy/kern/src/periodic.py
+++ b/GPy/kern/src/periodic.py
@@ -400,4 +400,3 @@ class PeriodicMatern52(Periodic):
         self.variance.gradient = np.sum(dK_dvar*dL_dK)
         self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
         self.period.gradient = np.sum(dK_dper*dL_dK)
-
diff --git a/GPy/kern/src/prod.py b/GPy/kern/src/prod.py
index ab3df431..43314e7a 100644
--- a/GPy/kern/src/prod.py
+++ b/GPy/kern/src/prod.py
@@ -39,6 +39,11 @@ class Prod(CombinationKernel):
                     kernels.insert(i, part)
         super(Prod, self).__init__(kernels, name)
 
+    def to_dict(self):
+        input_dict = super(Prod, self)._to_dict()
+        input_dict["class"] = str("GPy.kern.Prod")
+        return input_dict
+
     @Cache_this(limit=3, force_kwargs=['which_parts'])
     def K(self, X, X2=None, which_parts=None):
         if which_parts is None:
@@ -117,7 +122,7 @@ class Prod(CombinationKernel):
                 part_param_num = len(p.param_array) # number of parameters in the part
                 p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
                 part_start_param_index += part_param_num
-                
+
     def sde(self):
         """
         """
@@ -131,88 +136,88 @@ class Prod(CombinationKernel):
         dQc    = None
         dPinf  = None
         dP0  = None
-        
+
          # Assign models
         for p in self.parts:
             (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
-            
+
             # check derivative dimensions ->
-            number_of_parameters = len(p.param_array)            
+            number_of_parameters = len(p.param_array)
             assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
             assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
             assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
             # check derivative dimensions <-
-            
+
             # exception for periodic kernel
             if (p.name == 'std_periodic'):
-                Qct = P_inft  
-                dQct = dP_inft                 
-            
+                Qct = P_inft
+                dQct = dP_inft
+
             dF    = dkron(F,dF,Ft,dFt,'sum')
             dQc   = dkron(Qc,dQc,Qct,dQct,'prod')
             dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
             dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
-            
+
             F    = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
             L    = np.kron(L,Lt)
             Qc   = np.kron(Qc,Qct)
             Pinf = np.kron(Pinf,P_inft)
             P0 = np.kron(P0,P_inft)
             H    = np.kron(H,Ht)
-            
+
         return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
 
 def dkron(A,dA,B,dB, operation='prod'):
     """
-    Function computes the derivative of Kronecker product A*B 
+    Function computes the derivative of Kronecker product A*B
     (or Kronecker sum A+B).
-    
+
     Input:
     -----------------------
-    
+
     A: 2D matrix
-        Some matrix 
+        Some matrix
     dA: 3D (or 2D matrix)
         Derivarives of A
     B: 2D matrix
-        Some matrix 
+        Some matrix
     dB: 3D (or 2D matrix)
-        Derivarives of B    
-    
+        Derivarives of B
+
     operation: str 'prod' or 'sum'
         Which operation is considered. If the operation is 'sum' it is assumed
         that A and are square matrices.s
-    
+
     Output:
         dC: 3D matrix
         Derivative of Kronecker product A*B (or Kronecker sum A+B)
     """
-    
+
     if dA is None:
         dA_param_num = 0
         dA = np.zeros((A.shape[0], A.shape[1],1))
     else:
         dA_param_num = dA.shape[2]
-    
+
     if dB is None:
         dB_param_num = 0
-        dB = np.zeros((B.shape[0], B.shape[1],1))    
+        dB = np.zeros((B.shape[0], B.shape[1],1))
     else:
         dB_param_num = dB.shape[2]
 
     # Space allocation for derivative matrix
-    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))    
-    
+    dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num +  dB_param_num))
+
     for k in range(dA_param_num):
         if operation == 'prod':
             dC[:,:,k] = np.kron(dA[:,:,k],B);
         else:
             dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
-            
+
     for k in range(dB_param_num):
         if operation == 'prod':
             dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
         else:
             dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
-            
+
     return dC
diff --git a/GPy/kern/src/rbf.py b/GPy/kern/src/rbf.py
index 9059ccc0..0b6730d8 100644
--- a/GPy/kern/src/rbf.py
+++ b/GPy/kern/src/rbf.py
@@ -31,6 +31,14 @@ class RBF(Stationary):
             self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
             self.link_parameter(self.inv_l)
 
+    def to_dict(self):
+        input_dict = super(RBF, self)._to_dict()
+        input_dict["class"] = "GPy.kern.RBF"
+        input_dict["inv_l"] = self.use_invLengthscale
+        if input_dict["inv_l"] == True:
+            input_dict["lengthscale"] = np.sqrt(1 / float(self.inv_l))
+        return input_dict
+
     def K_of_r(self, r):
         return self.variance * np.exp(-0.5 * r**2)
 
@@ -42,7 +50,7 @@ class RBF(Stationary):
 
     def dK2_drdr_diag(self):
         return -self.variance # as the diagonal of r is always filled with zeros
-    
+
     def __getstate__(self):
         dc = super(RBF, self).__getstate__()
         if self.useGPU:
diff --git a/GPy/kern/src/standard_periodic.py b/GPy/kern/src/standard_periodic.py
index 974bce6a..201f8d19 100644
--- a/GPy/kern/src/standard_periodic.py
+++ b/GPy/kern/src/standard_periodic.py
@@ -93,6 +93,17 @@ class StdPeriodic(Kern):
 
         self.link_parameters(self.variance,  self.period, self.lengthscale)
 
+    def to_dict(self):
+        input_dict = super(StdPeriodic, self)._to_dict()
+        input_dict["class"] = "GPy.kern.StdPeriodic"
+        input_dict["variance"] = self.variance.values.tolist()
+        input_dict["period"] = self.period.values.tolist()
+        input_dict["lengthscale"] = self.lengthscale.values.tolist()
+        input_dict["ARD1"] = self.ARD1
+        input_dict["ARD2"] = self.ARD2
+        return input_dict
+
+
     def parameters_changed(self):
         """
         This functions deals as a callback for each optimization iteration.
diff --git a/GPy/kern/src/static.py b/GPy/kern/src/static.py
index 0ffd8112..f7042cc1 100644
--- a/GPy/kern/src/static.py
+++ b/GPy/kern/src/static.py
@@ -14,6 +14,11 @@ class Static(Kern):
         self.variance = Param('variance', variance, Logexp())
         self.link_parameters(self.variance)
 
+    def _to_dict(self):
+        input_dict = super(Static, self)._to_dict()
+        input_dict["variance"] =  self.variance.values.tolist()
+        return input_dict
+
     def Kdiag(self, X):
         ret = np.empty((X.shape[0],), dtype=np.float64)
         ret[:] = self.variance
@@ -133,6 +138,16 @@ class Bias(Static):
     def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
         super(Bias, self).__init__(input_dim, variance, active_dims, name)
 
+    def to_dict(self):
+        input_dict = super(Bias, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Bias"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Bias(**input_dict)
+
     def K(self, X, X2=None):
         shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
         return np.full(shape, self.variance, dtype=np.float64)
@@ -250,4 +265,3 @@ class Precomputed(Fixed):
 
     def update_gradients_diag(self, dL_dKdiag, X):
         self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))
-
diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py
index 79d4b954..4e8ddb77 100644
--- a/GPy/kern/src/stationary.py
+++ b/GPy/kern/src/stationary.py
@@ -79,6 +79,13 @@ class Stationary(Kern):
         assert self.variance.size==1
         self.link_parameters(self.variance, self.lengthscale)
 
+    def _to_dict(self):
+        input_dict = super(Stationary, self)._to_dict()
+        input_dict["variance"] =  self.variance.values.tolist()
+        input_dict["lengthscale"] = self.lengthscale.values.tolist()
+        input_dict["ARD"] = self.ARD
+        return input_dict
+
     def K_of_r(self, r):
         raise NotImplementedError("implement the covariance function as a fn of r to use this class")
 
@@ -351,6 +358,16 @@ class Exponential(Stationary):
     def dK_dr(self, r):
         return -self.K_of_r(r)
 
+    def to_dict(self):
+        input_dict = super(Exponential, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Exponential"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Exponential(**input_dict)
+
 #    def sde(self):
 #        """
 #        Return the state space representation of the covariance.
@@ -399,6 +416,16 @@ class Matern32(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
         super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
+    def to_dict(self):
+        input_dict = super(Matern32, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Matern32"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Matern32(**input_dict)
+
     def K_of_r(self, r):
         return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
 
@@ -478,6 +505,16 @@ class Matern52(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
         super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
+    def to_dict(self):
+        input_dict = super(Matern52, self)._to_dict()
+        input_dict["class"] = "GPy.kern.Matern52"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return Matern52(**input_dict)
+
     def K_of_r(self, r):
         return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
 
@@ -533,6 +570,16 @@ class ExpQuad(Stationary):
     def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
         super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
 
+    def to_dict(self):
+        input_dict = super(ExpQuad, self)._to_dict()
+        input_dict["class"] = "GPy.kern.ExpQuad"
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return ExpQuad(**input_dict)
+
     def K_of_r(self, r):
         return self.variance * np.exp(-0.5 * r**2)
 
@@ -566,6 +613,17 @@ class RatQuad(Stationary):
         self.power = Param('power', power, Logexp())
         self.link_parameters(self.power)
 
+    def to_dict(self):
+        input_dict = super(RatQuad, self)._to_dict()
+        input_dict["class"] = "GPy.kern.RatQuad"
+        input_dict["power"] = self.power.values.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        useGPU = input_dict.pop('useGPU', None)
+        return RatQuad(**input_dict)
+
     def K_of_r(self, r):
         r2 = np.square(r)
 #         return self.variance*np.power(1. + r2/2., -self.power)
@@ -588,5 +646,3 @@ class RatQuad(Stationary):
     def update_gradients_diag(self, dL_dKdiag, X):
         super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
         self.power.gradient = 0.
-
-
diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
index cfa16ad3..a00798f9 100644
--- a/GPy/likelihoods/bernoulli.py
+++ b/GPy/likelihoods/bernoulli.py
@@ -29,6 +29,11 @@ class Bernoulli(Likelihood):
         if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
             self.log_concave = True
 
+    def to_dict(self):
+        input_dict = super(Bernoulli, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.Bernoulli"
+        return input_dict
+
     def _preprocess_values(self, Y):
         """
         Check if the values of the observations correspond to the values
diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py
index 04fd6a33..412fe404 100644
--- a/GPy/likelihoods/gaussian.py
+++ b/GPy/likelihoods/gaussian.py
@@ -46,6 +46,13 @@ class Gaussian(Likelihood):
         if isinstance(gp_link, link_functions.Identity):
             self.log_concave = True
 
+    def to_dict(self):
+        input_dict = super(Gaussian, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.Gaussian"
+        input_dict["variance"] = self.variance.values.tolist()
+        return input_dict
+
+
     def betaY(self,Y,Y_metadata=None):
         #TODO: ~Ricardo this does not live here
         raise RuntimeError("Please notify the GPy developers, this should not happen")
diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py
index 308c6a76..b28fdef5 100644
--- a/GPy/likelihoods/likelihood.py
+++ b/GPy/likelihoods/likelihood.py
@@ -44,6 +44,37 @@ class Likelihood(Parameterized):
         self.gp_link = gp_link
         self.log_concave = False
         self.not_block_really = False
+        self.name = name
+
+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        input_dict["name"] = self.name
+        input_dict["gp_link_dict"] = self.gp_link.to_dict()
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        likelihood_class = input_dict.pop('class')
+        input_dict["name"] = str(input_dict["name"])
+        name = input_dict.pop('name')
+        import GPy
+        likelihood_class = eval(likelihood_class)
+        return likelihood_class._from_dict(likelihood_class, input_dict)
+
+    @staticmethod
+    def _from_dict(likelihood_class, input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        gp_link_dict = input_dict.pop('gp_link_dict')
+        import GPy
+        gp_link = GPy.likelihoods.link_functions.GPTransformation.from_dict(gp_link_dict)
+        input_dict["gp_link"] = gp_link
+        return likelihood_class(**input_dict)
 
     def request_num_latent_functions(self, Y):
         """
diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py
index 4947fdb8..d5fc785f 100644
--- a/GPy/likelihoods/link_functions.py
+++ b/GPy/likelihoods/link_functions.py
@@ -43,6 +43,25 @@ class GPTransformation(object):
         """
         raise NotImplementedError
 
+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        return {}
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        link_class = input_dict.pop('class')
+        import GPy
+        link_class = eval(link_class)
+        return link_class._from_dict(link_class, input_dict)
+
+    @staticmethod
+    def _from_dict(link_class, input_dict):
+        return link_class(**input_dict)
+
 class Identity(GPTransformation):
     """
     .. math::
@@ -62,6 +81,10 @@ class Identity(GPTransformation):
     def d3transf_df3(self,f):
         return np.zeros_like(f)
 
+    def to_dict(self):
+        input_dict = super(Identity, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.link_functions.Identity"
+        return input_dict
 
 class Probit(GPTransformation):
     """
@@ -82,6 +105,11 @@ class Probit(GPTransformation):
     def d3transf_df3(self,f):
         return (safe_square(f)-1.)*std_norm_pdf(f)
 
+    def to_dict(self):
+        input_dict = super(Probit, self)._to_dict()
+        input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
+        return input_dict
+
 
 class Cloglog(GPTransformation):
     """
diff --git a/GPy/mappings/constant.py b/GPy/mappings/constant.py
index 958be943..24c632c9 100644
--- a/GPy/mappings/constant.py
+++ b/GPy/mappings/constant.py
@@ -38,3 +38,9 @@ class Constant(Mapping):
 
     def gradients_X(self, dL_dF, X):
         return np.zeros_like(X)
+
+    def to_dict(self):
+        input_dict = super(Constant, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Constant"
+        input_dict["value"] = self.C.values[0]
+        return input_dict
diff --git a/GPy/mappings/identity.py b/GPy/mappings/identity.py
index b15e476c..261d918f 100644
--- a/GPy/mappings/identity.py
+++ b/GPy/mappings/identity.py
@@ -19,8 +19,7 @@ class Identity(Mapping):
     def gradients_X(self, dL_dF, X):
         return dL_dF
 
-
-
-
-
-
+    def to_dict(self):
+        input_dict = super(Identity, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Identity"
+        return input_dict
diff --git a/GPy/mappings/linear.py b/GPy/mappings/linear.py
index 26504830..e348d458 100644
--- a/GPy/mappings/linear.py
+++ b/GPy/mappings/linear.py
@@ -37,3 +37,21 @@ class Linear(Mapping):
 
     def gradients_X(self, dL_dF, X):
         return np.dot(dL_dF, self.A.T)
+
+    def to_dict(self):
+        input_dict = super(Linear, self)._to_dict()
+        input_dict["class"] = "GPy.mappings.Linear"
+        input_dict["A"] = self.A.values.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(mapping_class, input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        A = np.array(input_dict.pop('A'))
+        l = Linear(**input_dict)
+        l.unlink_parameter(l.A)
+        l.update_model(False)
+        l.A = Param('A', A)
+        l.link_parameter(l.A)
+        return l
diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py
index 2c4f7be4..702d2105 100644
--- a/GPy/models/gp_classification.py
+++ b/GPy/models/gp_classification.py
@@ -4,6 +4,7 @@
 from ..core import GP
 from .. import likelihoods
 from .. import kern
+import numpy as np
 from ..inference.latent_function_inference.expectation_propagation import EP
 
 class GPClassification(GP):
@@ -27,3 +28,23 @@ class GPClassification(GP):
         likelihood = likelihoods.Bernoulli()
 
         GP.__init__(self, X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification')
+
+    @staticmethod
+    def from_gp(gp):
+        from copy import deepcopy
+        gp = deepcopy(gp)
+        GPClassification(gp.X, gp.Y, gp.kern, gp.likelihood, gp.inference_method, gp.mean_function, name='gp_classification')
+
+    def to_dict(self, save_data=True):
+        model_dict = super(GPClassification,self).to_dict(save_data)
+        model_dict["class"] = "GPy.models.GPClassification"
+        return model_dict
+
+    @staticmethod
+    def from_dict(input_dict, data=None):
+        import GPy
+        m = GPy.core.model.Model.from_dict(input_dict, data)
+        return GPClassification.from_gp(m)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py
index 157c0dc8..10b3ba61 100644
--- a/GPy/models/gp_regression.py
+++ b/GPy/models/gp_regression.py
@@ -35,3 +35,23 @@ class GPRegression(GP):
 
         super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
 
+    @staticmethod
+    def from_gp(gp):
+        from copy import deepcopy
+        gp = deepcopy(gp)
+        return GPRegression(gp.X, gp.Y, gp.kern, gp.Y_metadata, gp.normalizer, gp.likelihood.variance.values, gp.mean_function)
+
+    def to_dict(self, save_data=True):
+        model_dict = super(GPRegression,self).to_dict(save_data)
+        model_dict["class"] = "GPy.models.GPRegression"
+        return model_dict
+
+    @staticmethod
+    def _from_dict(input_dict, data=None):
+        import GPy
+        input_dict["class"] = "GPy.core.GP"
+        m = GPy.core.GP.from_dict(input_dict, data)
+        return GPRegression.from_gp(m)
+
+    def save_model(self, output_filename, compress=True, save_data=True):
+        self._save_model(output_filename, compress=True, save_data=True)
diff --git a/GPy/testing/serialization_tests.py b/GPy/testing/serialization_tests.py
new file mode 100644
index 00000000..80dfd219
--- /dev/null
+++ b/GPy/testing/serialization_tests.py
@@ -0,0 +1,202 @@
+'''
+Created on 20 April 2017
+
+@author: pgmoren
+'''
+import unittest, itertools
+#import cPickle as pickle
+import pickle
+import numpy as np
+import tempfile
+import GPy
+from nose import SkipTest
+import numpy as np
+fixed_seed = 11
+
+
+class Test(unittest.TestCase):
+    def test_serialize_deserialize_kernels(self):
+        k1 = GPy.kern.RBF(2, variance=1.0, lengthscale=[1.0,1.0], ARD=True)
+        k2 = GPy.kern.RatQuad(2, variance=2.0, lengthscale=1.0, power=2.0, active_dims = [0,1])
+        k3 = GPy.kern.Bias(2, variance=2.0, active_dims = [1,0])
+        k4 = GPy.kern.StdPeriodic(2, variance=2.0, lengthscale=1.0, period=1.0, active_dims = [1,1])
+        k5 = GPy.kern.Linear(2, variances=[2.0, 1.0], ARD=True, active_dims = [1,1])
+        k6 = GPy.kern.Exponential(2, variance=1., lengthscale=2)
+        k7 = GPy.kern.Matern32(2, variance=1.0, lengthscale=[1.0,3.0], ARD=True, active_dims = [1,1])
+        k8 = GPy.kern.Matern52(2, variance=2.0, lengthscale=[2.0,1.0], ARD=True, active_dims = [1,0])
+        k9 = GPy.kern.ExpQuad(2, variance=3.0, lengthscale=[1.0,2.0], ARD=True, active_dims = [0,1])
+        k10 = k1 + k1.copy() + k2 + k3 + k4 + k5 + k6
+        k11 = k1 * k2 * k2.copy() * k3 * k4 * k5
+        k12 = (k1 + k2) * (k3 + k4 + k5)
+        k13 = ((k1 + k2) * k3) + k4 + k5 * k7
+        k14 = ((k1 + k2) * k3) + k4 * k5 + k8
+        k15 = ((k1 * k2) * k3) + k4 * k5 + k8 + k9
+
+        k_list = [k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15]
+
+        for kk in k_list:
+            kk_dict = kk.to_dict()
+            kk_r = GPy.kern.Kern.from_dict(kk_dict)
+            assert type(kk) == type(kk_r)
+            np.testing.assert_array_equal(kk[:], kk_r[:])
+            np.testing.assert_array_equal(np.array(kk.active_dims), np.array(kk_r.active_dims))
+
+    def test_serialize_deserialize_mappings(self):
+        m1 = GPy.mappings.Identity(3,2)
+        m2 = GPy.mappings.Constant(3,2,1)
+        m2_r = GPy.core.mapping.Mapping.from_dict(m2.to_dict())
+        np.testing.assert_array_equal(m2.C.values[:], m2_r.C.values[:])
+        m3 = GPy.mappings.Linear(3,2)
+        m3_r = GPy.core.mapping.Mapping.from_dict(m3.to_dict())
+        assert np.all(m3.A == m3_r.A)
+
+        m_list = [m1, m2, m3]
+        for mm in m_list:
+            mm_dict = mm.to_dict()
+            mm_r = GPy.core.mapping.Mapping.from_dict(mm_dict)
+            assert type(mm) == type(mm_r)
+            assert type(mm.input_dim) == type(mm_r.input_dim)
+            assert type(mm.output_dim) == type(mm_r.output_dim)
+
+    def test_serialize_deserialize_likelihoods(self):
+        l1 = GPy.likelihoods.Gaussian(GPy.likelihoods.link_functions.Identity(),variance=3.0)
+        l1_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l1.to_dict())
+        l2 = GPy.likelihoods.Bernoulli(GPy.likelihoods.link_functions.Probit())
+        l2_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l2.to_dict())
+        assert type(l1) == type(l1_r)
+        assert np.all(l1.variance == l1_r.variance)
+        assert type(l2) == type(l2_r)
+
+    def test_serialize_deserialize_normalizers(self):
+        n1 = GPy.util.normalizer.Standardize()
+        n1.scale_by(np.random.rand(10))
+        n1_r = GPy.util.normalizer._Norm.from_dict((n1.to_dict()))
+        assert type(n1) == type(n1_r)
+        assert np.all(n1.mean == n1_r.mean)
+        assert np.all(n1.std == n1_r.std)
+
+    def test_serialize_deserialize_link_functions(self):
+        l1 = GPy.likelihoods.link_functions.Identity()
+        l2 = GPy.likelihoods.link_functions.Probit()
+        l_list = [l1, l2]
+        for ll in l_list:
+            ll_dict = ll.to_dict()
+            ll_r = GPy.likelihoods.link_functions.GPTransformation.from_dict(ll_dict)
+            assert type(ll) == type(ll_r)
+
+    def test_serialize_deserialize_inference_methods(self):
+
+        e1 = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
+        e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
+        e1._ep_approximation = []
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParams(np.random.rand(10),np.random.rand(100).reshape((10,10))))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
+        e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
+        e1._ep_approximation[-1].v = np.random.rand(10)
+        e1._ep_approximation[-1].tau = np.random.rand(10)
+        e1._ep_approximation.append(np.random.rand(10))
+        e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
+
+        assert type(e1) == type(e1_r)
+        assert e1.epsilon==e1_r.epsilon
+        assert e1.eta==e1_r.eta
+        assert e1.delta==e1_r.delta
+        assert e1.always_reset==e1_r.always_reset
+        assert e1.max_iters==e1_r.max_iters
+        assert e1.ep_mode==e1_r.ep_mode
+        assert e1.parallel_updates==e1_r.parallel_updates
+
+        np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
+        np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
+        np.testing.assert_array_equal(e1._ep_approximation[0].Sigma[:], e1_r._ep_approximation[0].Sigma[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
+        np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
+        np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
+
+        e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
+        e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())
+
+        assert type(e2) == type(e2_r)
+
+    def test_serialize_deserialize_model(self):
+        np.random.seed(fixed_seed)
+        N = 20
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        likelihood = GPy.likelihoods.Bernoulli()
+        inference_method=GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
+        mean_function=None
+        m = GPy.core.GP(X=X, Y=Y,  kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='gp_classification')
+        m.optimize()
+        m.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
+        m2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_with_data.json.zip")
+        os.remove("temp_test_gp_without_data.json.zip")
+        var = m.predict(X)[0]
+        var1_r = m1_r.predict(X)[0]
+        var2_r = m2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())
+
+    def test_serialize_deserialize_inference_GPRegressor(self):
+        np.random.seed(fixed_seed)
+        N = 50
+        N_new = 50
+        D = 1
+        X = np.random.uniform(-3., 3., (N, 1))
+        Y = np.sin(X) + np.random.randn(N, D) * 0.05
+        X_new = np.random.uniform(-3., 3., (N_new, 1))
+        k = GPy.kern.RBF(input_dim=1, lengthscale=10)
+        m = GPy.models.GPRegression(X,Y,k)
+        m.optimize()
+        m.save_model("temp_test_gp_regressor_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_regressor_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_with_data.json.zip")
+        m2_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_regressor_with_data.json.zip")
+        os.remove("temp_test_gp_regressor_without_data.json.zip")
+
+        Xp = np.random.uniform(size=(int(1e5),1))
+        Xp[:,0] = Xp[:,0]*15-5
+
+        _, var = m.predict(Xp)
+        _, var1_r = m1_r.predict(Xp)
+        _, var2_r = m2_r.predict(Xp)
+        np.testing.assert_array_equal(var.flatten(), var1_r.flatten())
+        np.testing.assert_array_equal(var.flatten(), var2_r.flatten())
+
+    def test_serialize_deserialize_inference_GPClassifier(self):
+        np.random.seed(fixed_seed)
+        N = 50
+        Nhalf = int(N/2)
+        X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
+        Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
+        kernel = GPy.kern.RBF(1)
+        m = GPy.models.GPClassification(X, Y, kernel=kernel)
+        m.optimize()
+        m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True)
+        m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False)
+        m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip")
+        m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y))
+        import os
+        os.remove("temp_test_gp_classifier_with_data.json.zip")
+        os.remove("temp_test_gp_classifier_without_data.json.zip")
+
+        var = m.predict(X)[0]
+        var1_r = m1_r.predict(X)[0]
+        var2_r = m2_r.predict(X)[0]
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+        np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
+
+if __name__ == "__main__":
+    #import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
+    unittest.main()
diff --git a/GPy/util/normalizer.py b/GPy/util/normalizer.py
index 557d9825..b62ac35b 100644
--- a/GPy/util/normalizer.py
+++ b/GPy/util/normalizer.py
@@ -33,6 +33,27 @@ class _Norm(object):
         """
         raise NotImplementedError
 
+    def to_dict(self):
+        raise NotImplementedError
+
+    def _to_dict(self):
+        input_dict = {}
+        return input_dict
+
+    @staticmethod
+    def from_dict(input_dict):
+        import copy
+        input_dict = copy.deepcopy(input_dict)
+        normalizer_class = input_dict.pop('class')
+        import GPy
+        normalizer_class = eval(normalizer_class)
+        return normalizer_class._from_dict(normalizer_class, input_dict)
+
+    @staticmethod
+    def _from_dict(normalizer_class, input_dict):
+        return normalizer_class(**input_dict)
+
+
 class Standardize(_Norm):
     def __init__(self):
         self.mean = None
@@ -50,9 +71,26 @@ class Standardize(_Norm):
     def scaled(self):
         return self.mean is not None
 
+    def to_dict(self):
+        input_dict = super(Standardize, self)._to_dict()
+        input_dict["class"] = "GPy.util.normalizer.Standardize"
+        if self.mean is not None:
+            input_dict["mean"] = self.mean.tolist()
+            input_dict["std"] = self.std.tolist()
+        return input_dict
+
+    @staticmethod
+    def _from_dict(kernel_class, input_dict):
+        s = Standardize()
+        if "mean" in input_dict:
+            s.mean = np.array(input_dict["mean"])
+        if "std" in input_dict:
+            s.std = np.array(input_dict["std"])
+        return s
+
 # Inverse variance to be implemented, disabling for now
 # If someone in the future want to implement this,
-# we need to implement the inverse variance for 
+# we need to implement the inverse variance for
 # normalization. This means, we need to know the factor
 # for the variance to be multiplied to the variance in
 # normalized space. This is easy to compute for standardization
@@ -71,7 +109,7 @@ class Standardize(_Norm):
 #     def inverse_mean(self, X):
 #         return (X + .5) * (self.ymax - self.ymin) + self.ymin
 #     def inverse_variance(self, var):
-#         
+#
 #         return (var*(self.std**2))
 #     def scaled(self):
-#         return (self.ymin is not None) and (self.ymax is not None)
\ No newline at end of file
+#         return (self.ymin is not None) and (self.ymax is not None)