mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-26 21:36:23 +02:00
371 lines
15 KiB
Python
371 lines
15 KiB
Python
# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
|
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
|
|
|
|
from .. import likelihoods
|
|
from ..inference import optimization
|
|
from ..util.misc import opt_wrapper
|
|
from parameterization import Parameterized
|
|
import multiprocessing as mp
|
|
import numpy as np
|
|
from numpy.linalg.linalg import LinAlgError
|
|
import itertools
|
|
# import numdifftools as ndt
|
|
|
|
class Model(Parameterized):
|
|
_fail_count = 0 # Count of failed optimization steps (see objective)
|
|
_allowed_failures = 10 # number of allowed failures
|
|
|
|
def __init__(self, name):
|
|
super(Model, self).__init__(name) # Parameterized.__init__(self)
|
|
self.optimization_runs = []
|
|
self.sampling_runs = []
|
|
self.preferred_optimizer = 'bfgs'
|
|
|
|
def log_likelihood(self):
|
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
|
def _log_likelihood_gradients(self):
|
|
return self.gradient
|
|
|
|
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
|
"""
|
|
Perform random restarts of the model, and set the model to the best
|
|
seen solution.
|
|
|
|
If the robust flag is set, exceptions raised during optimizations will
|
|
be handled silently. If _all_ runs fail, the model is reset to the
|
|
existing parameter values.
|
|
|
|
**Notes**
|
|
|
|
:param num_restarts: number of restarts to use (default 10)
|
|
:type num_restarts: int
|
|
:param robust: whether to handle exceptions silently or not (default False)
|
|
:type robust: bool
|
|
:param parallel: whether to run each restart as a separate process. It relies on the multiprocessing module.
|
|
:type parallel: bool
|
|
:param num_processes: number of workers in the multiprocessing pool
|
|
:type numprocesses: int
|
|
|
|
\*\*kwargs are passed to the optimizer. They can be:
|
|
|
|
:param max_f_eval: maximum number of function evaluations
|
|
:type max_f_eval: int
|
|
:param max_iters: maximum number of iterations
|
|
:type max_iters: int
|
|
:param messages: whether to display during optimisation
|
|
:type messages: bool
|
|
|
|
.. note:: If num_processes is None, the number of workes in the
|
|
multiprocessing pool is automatically set to the number of processors
|
|
on the current machine.
|
|
|
|
"""
|
|
initial_parameters = self.optimizer_array.copy()
|
|
|
|
if parallel:
|
|
try:
|
|
jobs = []
|
|
pool = mp.Pool(processes=num_processes)
|
|
for i in range(num_restarts):
|
|
self.randomize()
|
|
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
|
jobs.append(job)
|
|
|
|
pool.close() # signal that no more data coming in
|
|
pool.join() # wait for all the tasks to complete
|
|
except KeyboardInterrupt:
|
|
print "Ctrl+c received, terminating and joining pool."
|
|
pool.terminate()
|
|
pool.join()
|
|
|
|
for i in range(num_restarts):
|
|
try:
|
|
if not parallel:
|
|
self.randomize()
|
|
self.optimize(**kwargs)
|
|
else:
|
|
self.optimization_runs.append(jobs[i].get())
|
|
|
|
if verbose:
|
|
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
|
|
except Exception as e:
|
|
if robust:
|
|
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
|
|
else:
|
|
raise e
|
|
|
|
if len(self.optimization_runs):
|
|
i = np.argmin([o.f_opt for o in self.optimization_runs])
|
|
self.optimizer_array = self.optimization_runs[i].x_opt
|
|
else:
|
|
self.optimizer_array = initial_parameters
|
|
|
|
def ensure_default_constraints(self, warning=True):
|
|
"""
|
|
Ensure that any variables which should clearly be positive
|
|
have been constrained somehow. The method performs a regular
|
|
expression search on parameter names looking for the terms
|
|
'variance', 'lengthscale', 'precision' and 'kappa'. If any of
|
|
these terms are present in the name the parameter is
|
|
constrained positive.
|
|
|
|
DEPRECATED.
|
|
"""
|
|
raise DeprecationWarning, 'parameters now have default constraints'
|
|
|
|
def objective_function(self):
|
|
"""
|
|
The objective function for the given algorithm.
|
|
|
|
This function is the true objective, which wants to be minimized.
|
|
Note that all parameters are already set and in place, so you just need
|
|
to return the objective function here.
|
|
|
|
For probabilistic models this is the negative log_likelihood
|
|
(including the MAP prior), so we return it here. If your model is not
|
|
probabilistic, just return your objective to minimize here!
|
|
"""
|
|
return -float(self.log_likelihood()) - self.log_prior()
|
|
|
|
def objective_function_gradients(self):
|
|
"""
|
|
The gradients for the objective function for the given algorithm.
|
|
The gradients are w.r.t. the *negative* objective function, as
|
|
this framework works with *negative* log-likelihoods as a default.
|
|
|
|
You can find the gradient for the parameters in self.gradient at all times.
|
|
This is the place, where gradients get stored for parameters.
|
|
|
|
This function is the true objective, which wants to be minimized.
|
|
Note that all parameters are already set and in place, so you just need
|
|
to return the gradient here.
|
|
|
|
For probabilistic models this is the gradient of the negative log_likelihood
|
|
(including the MAP prior), so we return it here. If your model is not
|
|
probabilistic, just return your *negative* gradient here!
|
|
"""
|
|
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
|
|
|
def _grads(self, x):
|
|
"""
|
|
Gets the gradients from the likelihood and the priors.
|
|
|
|
Failures are handled robustly. The algorithm will try several times to
|
|
return the gradients, and will raise the original exception if
|
|
the objective cannot be computed.
|
|
|
|
:param x: the parameters of the model.
|
|
:type x: np.array
|
|
"""
|
|
try:
|
|
# self._set_params_transformed(x)
|
|
self.optimizer_array = x
|
|
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
|
self._fail_count = 0
|
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
|
if self._fail_count >= self._allowed_failures:
|
|
raise
|
|
self._fail_count += 1
|
|
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
|
return obj_grads
|
|
|
|
def _objective(self, x):
|
|
"""
|
|
The objective function passed to the optimizer. It combines
|
|
the likelihood and the priors.
|
|
|
|
Failures are handled robustly. The algorithm will try several times to
|
|
return the objective, and will raise the original exception if
|
|
the objective cannot be computed.
|
|
|
|
:param x: the parameters of the model.
|
|
:parameter type: np.array
|
|
"""
|
|
try:
|
|
self.optimizer_array = x
|
|
obj = self.objective_function()
|
|
self._fail_count = 0
|
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
|
if self._fail_count >= self._allowed_failures:
|
|
raise
|
|
self._fail_count += 1
|
|
return np.inf
|
|
return obj
|
|
|
|
def _objective_grads(self, x):
|
|
try:
|
|
self.optimizer_array = x
|
|
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
|
self._fail_count = 0
|
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
|
if self._fail_count >= self._allowed_failures:
|
|
raise
|
|
self._fail_count += 1
|
|
obj_f = np.inf
|
|
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
|
return obj_f, obj_grads
|
|
|
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
|
"""
|
|
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
|
kwargs are passed to the optimizer. They can be:
|
|
|
|
:param max_f_eval: maximum number of function evaluations
|
|
:type max_f_eval: int
|
|
:messages: whether to display during optimisation
|
|
:type messages: bool
|
|
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
|
:type optimizer: string
|
|
|
|
TODO: valid args
|
|
"""
|
|
if self.is_fixed:
|
|
raise RuntimeError, "Cannot optimize, when everything is fixed"
|
|
if self.size == 0:
|
|
raise RuntimeError, "Model without parameters cannot be optimized"
|
|
|
|
if start == None:
|
|
start = self.optimizer_array
|
|
|
|
if optimizer is None:
|
|
optimizer = self.preferred_optimizer
|
|
|
|
if isinstance(optimizer, optimization.Optimizer):
|
|
opt = optimizer
|
|
opt.model = self
|
|
else:
|
|
optimizer = optimization.get_optimizer(optimizer)
|
|
opt = optimizer(start, model=self, **kwargs)
|
|
|
|
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
|
|
|
|
self.optimization_runs.append(opt)
|
|
|
|
self.optimizer_array = opt.x_opt
|
|
|
|
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
|
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
|
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
|
|
sgd.run()
|
|
self.optimization_runs.append(sgd)
|
|
|
|
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
|
"""
|
|
Check the gradient of the ,odel by comparing to a numerical
|
|
estimate. If the verbose flag is passed, individual
|
|
components are tested (and printed)
|
|
|
|
:param verbose: If True, print a "full" checking of each parameter
|
|
:type verbose: bool
|
|
:param step: The size of the step around which to linearise the objective
|
|
:type step: float (default 1e-6)
|
|
:param tolerance: the tolerance allowed (see note)
|
|
:type tolerance: float (default 1e-3)
|
|
|
|
Note:-
|
|
The gradient is considered correct if the ratio of the analytical
|
|
and numerical gradients is within <tolerance> of unity.
|
|
"""
|
|
x = self.optimizer_array.copy()
|
|
|
|
if not verbose:
|
|
# make sure only to test the selected parameters
|
|
if target_param is None:
|
|
transformed_index = range(len(x))
|
|
else:
|
|
transformed_index = self._raveled_index_for(target_param)
|
|
if self._has_fixes():
|
|
indices = np.r_[:self.size]
|
|
which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
|
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
|
|
|
|
if transformed_index.size == 0:
|
|
print "No free parameters to check"
|
|
return
|
|
|
|
# just check the global ratio
|
|
dx = np.zeros(x.shape)
|
|
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
|
|
|
|
# evaulate around the point x
|
|
f1 = self._objective(x + dx)
|
|
f2 = self._objective(x - dx)
|
|
gradient = self._grads(x)
|
|
|
|
dx = dx[transformed_index]
|
|
gradient = gradient[transformed_index]
|
|
|
|
denominator = (2 * np.dot(dx, gradient))
|
|
global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
|
|
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
|
if global_ratio is np.nan:
|
|
global_ratio = 0
|
|
return np.abs(1. - global_ratio) < tolerance or global_diff
|
|
else:
|
|
# check the gradient of each parameter individually, and do some pretty printing
|
|
try:
|
|
names = self._get_param_names()
|
|
except NotImplementedError:
|
|
names = ['Variable %i' % i for i in range(len(x))]
|
|
# Prepare for pretty-printing
|
|
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
|
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
|
|
float_len = 10
|
|
cols = [max_names]
|
|
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
|
cols = np.array(cols) + 5
|
|
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
|
separator = '-' * len(header_string[0])
|
|
print '\n'.join([header_string[0], separator])
|
|
if target_param is None:
|
|
param_index = range(len(x))
|
|
transformed_index = param_index
|
|
else:
|
|
param_index = self._raveled_index_for(target_param)
|
|
if self._has_fixes():
|
|
indices = np.r_[:self.size]
|
|
which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
|
param_index = param_index[which[0]]
|
|
transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
|
|
# print param_index, transformed_index
|
|
else:
|
|
transformed_index = param_index
|
|
|
|
if param_index.size == 0:
|
|
print "No free parameters to check"
|
|
return
|
|
|
|
gradient = self._grads(x).copy()
|
|
np.where(gradient == 0, 1e-312, gradient)
|
|
ret = True
|
|
for nind, xind in itertools.izip(param_index, transformed_index):
|
|
xx = x.copy()
|
|
xx[xind] += step
|
|
f1 = self._objective(xx)
|
|
xx[xind] -= 2.*step
|
|
f2 = self._objective(xx)
|
|
numerical_gradient = (f1 - f2) / (2 * step)
|
|
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
|
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
|
difference = np.abs(numerical_gradient - gradient[xind])
|
|
|
|
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
|
formatted_name = "\033[92m {0} \033[0m".format(names[nind])
|
|
ret &= True
|
|
else:
|
|
formatted_name = "\033[91m {0} \033[0m".format(names[nind])
|
|
ret &= False
|
|
|
|
r = '%.6f' % float(ratio)
|
|
d = '%.6f' % float(difference)
|
|
g = '%.6f' % gradient[xind]
|
|
ng = '%.6f' % float(numerical_gradient)
|
|
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
|
print grad_string
|
|
|
|
self.optimizer_array = x
|
|
return ret
|
|
|
|
|