optimize_restarts() is now parallel (load-balanced). It also mantains compatibility with the verbose and robust options

This commit is contained in:
Nicolò Fusi 2013-02-24 13:23:46 +00:00
parent 894412a177
commit 1705ecce91
3 changed files with 41 additions and 10 deletions

View file

@ -5,6 +5,8 @@
import numpy as np
from scipy import optimize
import sys, pdb
import multiprocessing as mp
from GPy.util.misc import opt_wrapper
#import numdifftools as ndt
from parameterised import parameterised, truncate_pad
import priors
@ -166,7 +168,7 @@ class model(parameterised):
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs):
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
"""
Perform random restarts of the model, and set the model to the best
seen solution.
@ -181,23 +183,43 @@ class model(parameterised):
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:verbose: whether to show informations about the current restart
:parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
:num_processes: number of workers in the multiprocessing pool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically
set to the number of processors on the current machine.
"""
initial_parameters = self._get_params_transformed()
if parallel:
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts):
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
for i in range(Nrestarts):
try:
self.randomize()
self.optimize(**kwargs)
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1,
Nrestarts,
self.optimization_runs[-1].f_opt))
if not parallel:
self.randomize()
self.optimize(**kwargs)
else:
self.optimization_runs.append(jobs[i].get())
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
except Exception as e:
if robust:
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
else:
raise e
if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt)
@ -371,7 +393,7 @@ class model(parameterised):
param_list = range(len(x))
else:
param_list = self.grep_param_names(target_param)
for i in param_list:
xx = x.copy()
xx[i] += step

View file

@ -20,7 +20,6 @@ def toy_rbf_1d():
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
m.plot()
print(m)

View file

@ -4,6 +4,16 @@
import numpy as np
def opt_wrapper(m, **kwargs):
"""
This function just wraps the optimization procedure of a GPy
object so that optimize() pickleable (necessary for multiprocessing).
"""
m.randomize()
m.optimize(**kwargs)
return m.optimization_runs[-1]
def linear_grid(D, n = 100, min_max = (-100, 100)):
"""
Creates a D-dimensional grid of n linearly spaced points
@ -27,7 +37,7 @@ def kmm_init(X, m = 10):
This is the same initialization algorithm that is used
in Kmeans++. It's quite simple and very useful to initialize
the locations of the inducing points in sparse GPs.
:param X: data
:param m: number of inducing points
"""