diff --git a/GPy/core/model.py b/GPy/core/model.py
index 1265c025..6b7d32c6 100644
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@@ -5,6 +5,8 @@
 import numpy as np
 from scipy import optimize
 import sys, pdb
+import multiprocessing as mp
+from GPy.util.misc import opt_wrapper
 #import numdifftools as ndt
 from parameterised import parameterised, truncate_pad
 import priors
@@ -166,7 +168,7 @@ class model(parameterised):
         self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
 
 
-    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs):
+    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
         """
         Perform random restarts of the model, and set the model to the best
         seen solution.
@@ -181,23 +183,43 @@ class model(parameterised):
         :max_f_eval: maximum number of function evaluations
         :messages: whether to display during optimisation
         :verbose: whether to show informations about the current restart
+        :parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
+        :num_processes: number of workers in the multiprocessing pool
+        
+        ..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
+        set to the number of processors on the current machine.
+
+
         """
 
         initial_parameters = self._get_params_transformed()
+
+        if parallel:
+            jobs = []
+            pool = mp.Pool(processes=num_processes)
+            for i in range(Nrestarts):
+                job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
+                jobs.append(job)
+
+            pool.close() # signal that no more data coming in
+            pool.join()  # wait for all the tasks to complete
+
         for i in range(Nrestarts):
             try:
-                self.randomize()
-                self.optimize(**kwargs)
-                if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i+1,
-                                                                      Nrestarts,
-                                                                      self.optimization_runs[-1].f_opt))
+                if not parallel:
+                    self.randomize()
+                    self.optimize(**kwargs)
+                else:
+                    self.optimization_runs.append(jobs[i].get())
 
+                if verbose:
+                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
             except Exception as e:
                 if robust:
                     print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
                 else:
                     raise e
+
         if len(self.optimization_runs):
             i = np.argmin([o.f_opt for o in self.optimization_runs])
             self._set_params_transformed(self.optimization_runs[i].x_opt)
@@ -371,7 +393,7 @@ class model(parameterised):
                 param_list = range(len(x))
             else:
                 param_list = self.grep_param_names(target_param)
-                
+
             for i in param_list:
                 xx = x.copy()
                 xx[i] += step
diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py
index 9444e899..43fa0147 100644
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@@ -20,7 +20,6 @@ def toy_rbf_1d():
     # optimize
     m.ensure_default_constraints()
     m.optimize()
-
     # plot
     m.plot()
     print(m)
diff --git a/GPy/util/misc.py b/GPy/util/misc.py
index d660e2d4..e3b91dce 100644
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@@ -4,6 +4,16 @@
 
 import numpy as np
 
+def opt_wrapper(m, **kwargs):
+    """
+    This function just wraps the optimization procedure of a GPy
+    object so that optimize() pickleable (necessary for multiprocessing).
+    """
+    m.randomize()
+    m.optimize(**kwargs)
+    return m.optimization_runs[-1]
+
+
 def linear_grid(D, n = 100, min_max = (-100, 100)):
     """
     Creates a D-dimensional grid of n linearly spaced points
@@ -27,7 +37,7 @@ def kmm_init(X, m = 10):
     This is the same initialization algorithm that is used
     in Kmeans++. It's quite simple and very useful to initialize
     the locations of the inducing points in sparse GPs.
-    
+
     :param X: data
     :param m: number of inducing points
     """