optimize_restarts() is now parallel (load-balanced). It also mantains compatibility with the verbose and robust options

2026-05-15 06:52:39 +02:00 · 2013-02-24 13:23:46 +00:00 · 2013-02-24 13:23:46 +00:00 · 1705ecce91
commit 1705ecce91
parent 894412a177
3 changed files with 41 additions and 10 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -5,6 +5,8 @@
 import numpy as np
 from scipy import optimize
 import sys, pdb
+import multiprocessing as mp
+from GPy.util.misc import opt_wrapper
 #import numdifftools as ndt
 from parameterised import parameterised, truncate_pad
 import priors
@ -166,7 +168,7 @@ class model(parameterised):
        self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)


-    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs):
+    def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
        """
        Perform random restarts of the model, and set the model to the best
        seen solution.
@ -181,23 +183,43 @@ class model(parameterised):
        :max_f_eval: maximum number of function evaluations
        :messages: whether to display during optimisation
        :verbose: whether to show informations about the current restart
+        :parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
+        :num_processes: number of workers in the multiprocessing pool
+        
+        ..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
+        set to the number of processors on the current machine.
+
+
        """

        initial_parameters = self._get_params_transformed()
+
+        if parallel:
+            jobs = []
+            pool = mp.Pool(processes=num_processes)
+            for i in range(Nrestarts):
+                job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
+                jobs.append(job)
+
+            pool.close() # signal that no more data coming in
+            pool.join()  # wait for all the tasks to complete
+
        for i in range(Nrestarts):
            try:
-                self.randomize()
-                self.optimize(**kwargs)
-                if verbose:
-                    print("Optimization restart {0}/{1}, f = {2}".format(i+1,
-                                                                      Nrestarts,
-                                                                      self.optimization_runs[-1].f_opt))
+                if not parallel:
+                    self.randomize()
+                    self.optimize(**kwargs)
+                else:
+                    self.optimization_runs.append(jobs[i].get())

+                if verbose:
+                    print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
            except Exception as e:
                if robust:
                    print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
                else:
                    raise e
+
        if len(self.optimization_runs):
            i = np.argmin([o.f_opt for o in self.optimization_runs])
            self._set_params_transformed(self.optimization_runs[i].x_opt)
@ -371,7 +393,7 @@ class model(parameterised):
                param_list = range(len(x))
            else:
                param_list = self.grep_param_names(target_param)
-                
+
            for i in param_list:
                xx = x.copy()
                xx[i] += step
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -20,7 +20,6 @@ def toy_rbf_1d():
    # optimize
    m.ensure_default_constraints()
    m.optimize()
-
    # plot
    m.plot()
    print(m)
--- a/GPy/util/misc.py
+++ b/GPy/util/misc.py
@ -4,6 +4,16 @@

 import numpy as np

+def opt_wrapper(m, **kwargs):
+    """
+    This function just wraps the optimization procedure of a GPy
+    object so that optimize() pickleable (necessary for multiprocessing).
+    """
+    m.randomize()
+    m.optimize(**kwargs)
+    return m.optimization_runs[-1]
+
+
 def linear_grid(D, n = 100, min_max = (-100, 100)):
    """
    Creates a D-dimensional grid of n linearly spaced points
@ -27,7 +37,7 @@ def kmm_init(X, m = 10):
    This is the same initialization algorithm that is used
    in Kmeans++. It's quite simple and very useful to initialize
    the locations of the inducing points in sparse GPs.
-    
+
    :param X: data
    :param m: number of inducing points
    """