optimize_restarts() is now parallel (load-balanced). It also mantains compatibility with the verbose and robust options

This commit is contained in:
Nicolò Fusi 2013-02-24 13:23:46 +00:00
parent 894412a177
commit 1705ecce91
3 changed files with 41 additions and 10 deletions

View file

@ -5,6 +5,8 @@
import numpy as np import numpy as np
from scipy import optimize from scipy import optimize
import sys, pdb import sys, pdb
import multiprocessing as mp
from GPy.util.misc import opt_wrapper
#import numdifftools as ndt #import numdifftools as ndt
from parameterised import parameterised, truncate_pad from parameterised import parameterised, truncate_pad
import priors import priors
@ -166,7 +168,7 @@ class model(parameterised):
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...) self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs): def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
Perform random restarts of the model, and set the model to the best Perform random restarts of the model, and set the model to the best
seen solution. seen solution.
@ -181,23 +183,43 @@ class model(parameterised):
:max_f_eval: maximum number of function evaluations :max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation :messages: whether to display during optimisation
:verbose: whether to show informations about the current restart :verbose: whether to show informations about the current restart
:parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
:num_processes: number of workers in the multiprocessing pool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
set to the number of processors on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
if parallel:
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts):
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
for i in range(Nrestarts): for i in range(Nrestarts):
try: try:
self.randomize() if not parallel:
self.optimize(**kwargs) self.randomize()
if verbose: self.optimize(**kwargs)
print("Optimization restart {0}/{1}, f = {2}".format(i+1, else:
Nrestarts, self.optimization_runs.append(jobs[i].get())
self.optimization_runs[-1].f_opt))
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
except Exception as e: except Exception as e:
if robust: if robust:
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts)) print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
else: else:
raise e raise e
if len(self.optimization_runs): if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs]) i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt) self._set_params_transformed(self.optimization_runs[i].x_opt)
@ -371,7 +393,7 @@ class model(parameterised):
param_list = range(len(x)) param_list = range(len(x))
else: else:
param_list = self.grep_param_names(target_param) param_list = self.grep_param_names(target_param)
for i in param_list: for i in param_list:
xx = x.copy() xx = x.copy()
xx[i] += step xx[i] += step

View file

@ -20,7 +20,6 @@ def toy_rbf_1d():
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize() m.optimize()
# plot # plot
m.plot() m.plot()
print(m) print(m)

View file

@ -4,6 +4,16 @@
import numpy as np import numpy as np
def opt_wrapper(m, **kwargs):
"""
This function just wraps the optimization procedure of a GPy
object so that optimize() pickleable (necessary for multiprocessing).
"""
m.randomize()
m.optimize(**kwargs)
return m.optimization_runs[-1]
def linear_grid(D, n = 100, min_max = (-100, 100)): def linear_grid(D, n = 100, min_max = (-100, 100)):
""" """
Creates a D-dimensional grid of n linearly spaced points Creates a D-dimensional grid of n linearly spaced points
@ -27,7 +37,7 @@ def kmm_init(X, m = 10):
This is the same initialization algorithm that is used This is the same initialization algorithm that is used
in Kmeans++. It's quite simple and very useful to initialize in Kmeans++. It's quite simple and very useful to initialize
the locations of the inducing points in sparse GPs. the locations of the inducing points in sparse GPs.
:param X: data :param X: data
:param m: number of inducing points :param m: number of inducing points
""" """