mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
[optimization] deleted and backwardscompatible
This commit is contained in:
parent
850c10beaa
commit
913d80f712
8 changed files with 11 additions and 914 deletions
|
|
@ -20,7 +20,6 @@ backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_rema
|
||||||
for bc in backwards_compatibility:
|
for bc in backwards_compatibility:
|
||||||
sys.modules['GPy.core.parameterization.{!s}'.format(bc)] = getattr(core.parameterization, bc)
|
sys.modules['GPy.core.parameterization.{!s}'.format(bc)] = getattr(core.parameterization, bc)
|
||||||
|
|
||||||
|
|
||||||
# Direct imports for convenience:
|
# Direct imports for convenience:
|
||||||
from .core import Model
|
from .core import Model
|
||||||
from .core.parameterization import priors
|
from .core.parameterization import priors
|
||||||
|
|
@ -52,5 +51,6 @@ def load(file_or_path):
|
||||||
for name, module in inspect.getmembers(kern.src): # @UndefinedVariable
|
for name, module in inspect.getmembers(kern.src): # @UndefinedVariable
|
||||||
if not name.startswith('_'):
|
if not name.startswith('_'):
|
||||||
sys.modules['GPy.kern._src.{}'.format(name)] = module
|
sys.modules['GPy.kern._src.{}'.format(name)] = module
|
||||||
|
sys.modules['GPy.inference.optimization'] = inference.optimization
|
||||||
import paramz
|
import paramz
|
||||||
return paramz.load(file_or_path)
|
return paramz.load(file_or_path)
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
from . import latent_function_inference
|
|
||||||
from . import optimization
|
from . import optimization
|
||||||
|
from . import latent_function_inference
|
||||||
from . import mcmc
|
from . import mcmc
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.modules['GPy.inference.optimization'] = optimization
|
||||||
|
sys.modules['GPy.inference.optimization.optimization'] = optimization
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,5 @@
|
||||||
from .scg import SCG
|
from paramz.optimization import stochastics, Optimizer
|
||||||
from .optimization import *
|
from paramz.optimization import *
|
||||||
|
import sys
|
||||||
|
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
|
||||||
|
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
|
||||||
|
|
@ -1,285 +0,0 @@
|
||||||
# Copyright (c) 2012-2014, Max Zwiessele
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
from .gradient_descent_update_rules import FletcherReeves, \
|
|
||||||
PolakRibiere
|
|
||||||
from Queue import Empty
|
|
||||||
from multiprocessing import Value
|
|
||||||
from multiprocessing.queues import Queue
|
|
||||||
from multiprocessing.synchronize import Event
|
|
||||||
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
|
||||||
from threading import Thread
|
|
||||||
import numpy
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
|
|
||||||
RUNNING = "running"
|
|
||||||
CONVERGED = "converged"
|
|
||||||
MAXITER = "maximum number of iterations reached"
|
|
||||||
MAX_F_EVAL = "maximum number of function calls reached"
|
|
||||||
LINE_SEARCH = "line search failed"
|
|
||||||
KBINTERRUPT = "interrupted"
|
|
||||||
|
|
||||||
class _Async_Optimization(Thread):
|
|
||||||
|
|
||||||
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
|
|
||||||
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
|
||||||
gtol=1e-6, outqueue=None, *args, **kw):
|
|
||||||
"""
|
|
||||||
Helper Process class for async optimization
|
|
||||||
|
|
||||||
f_call and df_call are Multiprocessing Values, for synchronized assignment
|
|
||||||
"""
|
|
||||||
self.f_call = Value('i', 0)
|
|
||||||
self.df_call = Value('i', 0)
|
|
||||||
self.f = self.f_wrapper(f, self.f_call)
|
|
||||||
self.df = self.f_wrapper(df, self.df_call)
|
|
||||||
self.x0 = x0
|
|
||||||
self.update_rule = update_rule
|
|
||||||
self.report_every = report_every
|
|
||||||
self.messages = messages
|
|
||||||
self.maxiter = maxiter
|
|
||||||
self.max_f_eval = max_f_eval
|
|
||||||
self.gtol = gtol
|
|
||||||
self.SENTINEL = SENTINEL
|
|
||||||
self.runsignal = runsignal
|
|
||||||
# self.parent = parent
|
|
||||||
# self.result = None
|
|
||||||
self.outq = outqueue
|
|
||||||
super(_Async_Optimization, self).__init__(target=self.run,
|
|
||||||
name="CG Optimization",
|
|
||||||
*args, **kw)
|
|
||||||
|
|
||||||
# def __enter__(self):
|
|
||||||
# return self
|
|
||||||
#
|
|
||||||
# def __exit__(self, type, value, traceback):
|
|
||||||
# return isinstance(value, TypeError)
|
|
||||||
|
|
||||||
def f_wrapper(self, f, counter):
|
|
||||||
def f_w(*a, **kw):
|
|
||||||
counter.value += 1
|
|
||||||
return f(*a, **kw)
|
|
||||||
return f_w
|
|
||||||
|
|
||||||
def callback(self, *a):
|
|
||||||
if self.outq is not None:
|
|
||||||
self.outq.put(a)
|
|
||||||
# self.parent and self.parent.callback(*a, **kw)
|
|
||||||
pass
|
|
||||||
# print "callback done"
|
|
||||||
|
|
||||||
def callback_return(self, *a):
|
|
||||||
self.callback(*a)
|
|
||||||
if self.outq is not None:
|
|
||||||
self.outq.put(self.SENTINEL)
|
|
||||||
if self.messages:
|
|
||||||
print("")
|
|
||||||
self.runsignal.clear()
|
|
||||||
|
|
||||||
def run(self, *args, **kwargs):
|
|
||||||
raise NotImplementedError("Overwrite this with optimization (for async use)")
|
|
||||||
pass
|
|
||||||
|
|
||||||
class _CGDAsync(_Async_Optimization):
|
|
||||||
|
|
||||||
def reset(self, xi, *a, **kw):
|
|
||||||
gi = -self.df(xi, *a, **kw)
|
|
||||||
si = gi
|
|
||||||
ur = self.update_rule(gi)
|
|
||||||
return gi, ur, si
|
|
||||||
|
|
||||||
def run(self, *a, **kw):
|
|
||||||
status = RUNNING
|
|
||||||
|
|
||||||
fi = self.f(self.x0)
|
|
||||||
fi_old = fi + 5000
|
|
||||||
|
|
||||||
gi, ur, si = self.reset(self.x0, *a, **kw)
|
|
||||||
xi = self.x0
|
|
||||||
xi_old = numpy.nan
|
|
||||||
it = 0
|
|
||||||
|
|
||||||
while it < self.maxiter:
|
|
||||||
if not self.runsignal.is_set():
|
|
||||||
break
|
|
||||||
|
|
||||||
if self.f_call.value > self.max_f_eval:
|
|
||||||
status = MAX_F_EVAL
|
|
||||||
|
|
||||||
gi = -self.df(xi, *a, **kw)
|
|
||||||
if numpy.dot(gi.T, gi) <= self.gtol:
|
|
||||||
status = CONVERGED
|
|
||||||
break
|
|
||||||
if numpy.isnan(numpy.dot(gi.T, gi)):
|
|
||||||
if numpy.any(numpy.isnan(xi_old)):
|
|
||||||
status = CONVERGED
|
|
||||||
break
|
|
||||||
self.reset(xi_old)
|
|
||||||
|
|
||||||
gammai = ur(gi)
|
|
||||||
if gammai < 1e-6 or it % xi.shape[0] == 0:
|
|
||||||
gi, ur, si = self.reset(xi, *a, **kw)
|
|
||||||
si = gi + gammai * si
|
|
||||||
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
|
|
||||||
self.df,
|
|
||||||
xi,
|
|
||||||
si, gi,
|
|
||||||
fi, fi_old)
|
|
||||||
if alphai is None:
|
|
||||||
alphai, _, _, fi2, fi_old2, gfi = \
|
|
||||||
line_search_wolfe2(self.f, self.df,
|
|
||||||
xi, si, gi,
|
|
||||||
fi, fi_old)
|
|
||||||
if alphai is None:
|
|
||||||
# This line search also failed to find a better solution.
|
|
||||||
status = LINE_SEARCH
|
|
||||||
break
|
|
||||||
if fi2 < fi:
|
|
||||||
fi, fi_old = fi2, fi_old2
|
|
||||||
if gfi is not None:
|
|
||||||
gi = gfi
|
|
||||||
|
|
||||||
if numpy.isnan(fi) or fi_old < fi:
|
|
||||||
gi, ur, si = self.reset(xi, *a, **kw)
|
|
||||||
|
|
||||||
else:
|
|
||||||
xi += numpy.dot(alphai, si)
|
|
||||||
if self.messages:
|
|
||||||
sys.stdout.write("\r")
|
|
||||||
sys.stdout.flush()
|
|
||||||
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
|
||||||
|
|
||||||
if it % self.report_every == 0:
|
|
||||||
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
|
||||||
it += 1
|
|
||||||
else:
|
|
||||||
status = MAXITER
|
|
||||||
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
|
||||||
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
|
|
||||||
|
|
||||||
class Async_Optimize(object):
|
|
||||||
callback = lambda *x: None
|
|
||||||
runsignal = Event()
|
|
||||||
SENTINEL = "SENTINEL"
|
|
||||||
|
|
||||||
def async_callback_collect(self, q):
|
|
||||||
while self.runsignal.is_set():
|
|
||||||
try:
|
|
||||||
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
|
||||||
self.callback(*ret)
|
|
||||||
self.runsignal.clear()
|
|
||||||
except Empty:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
|
|
||||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
|
||||||
report_every=10, *args, **kwargs):
|
|
||||||
self.runsignal.set()
|
|
||||||
c = None
|
|
||||||
outqueue = None
|
|
||||||
if callback:
|
|
||||||
outqueue = Queue()
|
|
||||||
self.callback = callback
|
|
||||||
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
|
||||||
c.start()
|
|
||||||
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
|
|
||||||
report_every=report_every, messages=messages, maxiter=maxiter,
|
|
||||||
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
|
||||||
p.start()
|
|
||||||
return p, c
|
|
||||||
|
|
||||||
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
|
||||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
|
||||||
report_every=10, *args, **kwargs):
|
|
||||||
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
|
|
||||||
maxiter, max_f_eval, gtol,
|
|
||||||
report_every, *args, **kwargs)
|
|
||||||
while self.runsignal.is_set():
|
|
||||||
try:
|
|
||||||
p.join(1)
|
|
||||||
if c: c.join(1)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
# print "^C"
|
|
||||||
self.runsignal.clear()
|
|
||||||
p.join()
|
|
||||||
if c: c.join()
|
|
||||||
if c and c.is_alive():
|
|
||||||
# self.runsignal.set()
|
|
||||||
# while self.runsignal.is_set():
|
|
||||||
# try:
|
|
||||||
# c.join(.1)
|
|
||||||
# except KeyboardInterrupt:
|
|
||||||
# # print "^C"
|
|
||||||
# self.runsignal.clear()
|
|
||||||
# c.join()
|
|
||||||
print("WARNING: callback still running, optimisation done!")
|
|
||||||
return p.result
|
|
||||||
|
|
||||||
class CGD(Async_Optimize):
|
|
||||||
'''
|
|
||||||
Conjugate gradient descent algorithm to minimize
|
|
||||||
function f with gradients df, starting at x0
|
|
||||||
with update rule update_rule
|
|
||||||
|
|
||||||
if df returns tuple (grad, natgrad) it will optimize according
|
|
||||||
to natural gradient rules
|
|
||||||
'''
|
|
||||||
opt_name = "Conjugate Gradient Descent"
|
|
||||||
|
|
||||||
def opt_async(self, *a, **kw):
|
|
||||||
"""
|
|
||||||
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
|
||||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
|
||||||
report_every=10, \*args, \*\*kwargs)
|
|
||||||
|
|
||||||
callback gets called every `report_every` iterations
|
|
||||||
|
|
||||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
|
||||||
|
|
||||||
if df returns tuple (grad, natgrad) it will optimize according
|
|
||||||
to natural gradient rules
|
|
||||||
|
|
||||||
f, and df will be called with
|
|
||||||
|
|
||||||
f(xi, \*args, \*\*kwargs)
|
|
||||||
df(xi, \*args, \*\*kwargs)
|
|
||||||
|
|
||||||
**Returns:**
|
|
||||||
|
|
||||||
Started `Process` object, optimizing asynchronously
|
|
||||||
|
|
||||||
**Calls:**
|
|
||||||
|
|
||||||
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
|
||||||
|
|
||||||
at end of optimization!
|
|
||||||
"""
|
|
||||||
return super(CGD, self).opt_async(*a, **kw)
|
|
||||||
|
|
||||||
def opt(self, *a, **kw):
|
|
||||||
"""
|
|
||||||
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
|
||||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
|
||||||
report_every=10, \*args, \*\*kwargs)
|
|
||||||
|
|
||||||
Minimize f, calling callback every `report_every` iterations with following syntax:
|
|
||||||
|
|
||||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
|
||||||
|
|
||||||
if df returns tuple (grad, natgrad) it will optimize according
|
|
||||||
to natural gradient rules
|
|
||||||
|
|
||||||
f, and df will be called with
|
|
||||||
|
|
||||||
f(xi, \*args, \*\*kwargs)
|
|
||||||
df(xi, \*args, \*\*kwargs)
|
|
||||||
|
|
||||||
**returns**
|
|
||||||
|
|
||||||
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
|
||||||
|
|
||||||
at end of optimization
|
|
||||||
"""
|
|
||||||
return super(CGD, self).opt(*a, **kw)
|
|
||||||
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
# Copyright (c) 2012-2014, Max Zwiessele
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import numpy
|
|
||||||
|
|
||||||
class GDUpdateRule():
|
|
||||||
_gradnat = None
|
|
||||||
_gradnatold = None
|
|
||||||
def __init__(self, initgrad, initgradnat=None):
|
|
||||||
self.grad = initgrad
|
|
||||||
if initgradnat:
|
|
||||||
self.gradnat = initgradnat
|
|
||||||
else:
|
|
||||||
self.gradnat = initgrad
|
|
||||||
# self.grad, self.gradnat
|
|
||||||
def _gamma(self):
|
|
||||||
raise NotImplemented("""Implement gamma update rule here,
|
|
||||||
you can use self.grad and self.gradold for parameters, as well as
|
|
||||||
self.gradnat and self.gradnatold for natural gradients.""")
|
|
||||||
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
|
|
||||||
"""
|
|
||||||
Return gamma for given gradients and optional natural gradients
|
|
||||||
"""
|
|
||||||
if not gradnat:
|
|
||||||
gradnat = grad
|
|
||||||
self.gradold = self.grad
|
|
||||||
self.gradnatold = self.gradnat
|
|
||||||
self.grad = grad
|
|
||||||
self.gradnat = gradnat
|
|
||||||
self.si = si
|
|
||||||
return self._gamma(*args, **kw)
|
|
||||||
|
|
||||||
class FletcherReeves(GDUpdateRule):
|
|
||||||
'''
|
|
||||||
Fletcher Reeves update rule for gamma
|
|
||||||
'''
|
|
||||||
def _gamma(self, *a, **kw):
|
|
||||||
tmp = numpy.dot(self.grad.T, self.gradnat)
|
|
||||||
if tmp:
|
|
||||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
|
||||||
return tmp
|
|
||||||
|
|
||||||
class PolakRibiere(GDUpdateRule):
|
|
||||||
'''
|
|
||||||
Fletcher Reeves update rule for gamma
|
|
||||||
'''
|
|
||||||
def _gamma(self, *a, **kw):
|
|
||||||
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
|
|
||||||
if tmp:
|
|
||||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
|
||||||
return tmp
|
|
||||||
|
|
@ -1,289 +0,0 @@
|
||||||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import datetime as dt
|
|
||||||
from scipy import optimize
|
|
||||||
from warnings import warn
|
|
||||||
|
|
||||||
try:
|
|
||||||
import rasmussens_minimize as rasm
|
|
||||||
rasm_available = True
|
|
||||||
except ImportError:
|
|
||||||
rasm_available = False
|
|
||||||
from .scg import SCG
|
|
||||||
|
|
||||||
class Optimizer(object):
|
|
||||||
"""
|
|
||||||
Superclass for all the optimizers.
|
|
||||||
|
|
||||||
:param x_init: initial set of parameters
|
|
||||||
:param f_fp: function that returns the function AND the gradients at the same time
|
|
||||||
:param f: function to optimize
|
|
||||||
:param fp: gradients
|
|
||||||
:param messages: print messages from the optimizer?
|
|
||||||
:type messages: (True | False)
|
|
||||||
:param max_f_eval: maximum number of function evaluations
|
|
||||||
|
|
||||||
:rtype: optimizer object.
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, x_init=None, messages=False, max_f_eval=1e4, max_iters=1e3,
|
|
||||||
ftol=None, gtol=None, xtol=None, bfgs_factor=None):
|
|
||||||
self.opt_name = None
|
|
||||||
#x_init = x_init
|
|
||||||
# Turning messages off and using internal structure for print outs:
|
|
||||||
self.messages = False #messages
|
|
||||||
self.f_opt = None
|
|
||||||
self.x_opt = None
|
|
||||||
self.funct_eval = None
|
|
||||||
self.status = None
|
|
||||||
self.max_f_eval = int(max_iters)
|
|
||||||
self.max_iters = int(max_iters)
|
|
||||||
self.bfgs_factor = bfgs_factor
|
|
||||||
self.trace = None
|
|
||||||
self.time = "Not available"
|
|
||||||
self.xtol = xtol
|
|
||||||
self.gtol = gtol
|
|
||||||
self.ftol = ftol
|
|
||||||
|
|
||||||
def run(self, **kwargs):
|
|
||||||
start = dt.datetime.now()
|
|
||||||
self.opt(**kwargs)
|
|
||||||
end = dt.datetime.now()
|
|
||||||
self.time = str(end - start)
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
raise NotImplementedError("this needs to be implemented to use the optimizer class")
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
|
|
||||||
diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
|
|
||||||
diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
|
|
||||||
diagnostics += "Optimization status: \t\t\t %s\n" % self.status
|
|
||||||
diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
|
|
||||||
return diagnostics
|
|
||||||
|
|
||||||
def __getstate__(self):
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
class opt_tnc(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "TNC (Scipy implementation)"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
"""
|
|
||||||
Run the TNC optimizer
|
|
||||||
|
|
||||||
"""
|
|
||||||
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
|
|
||||||
'Line search failed', 'Function is constant']
|
|
||||||
|
|
||||||
assert f_fp != None, "TNC requires f_fp"
|
|
||||||
|
|
||||||
opt_dict = {}
|
|
||||||
if self.xtol is not None:
|
|
||||||
opt_dict['xtol'] = self.xtol
|
|
||||||
if self.ftol is not None:
|
|
||||||
opt_dict['ftol'] = self.ftol
|
|
||||||
if self.gtol is not None:
|
|
||||||
opt_dict['pgtol'] = self.gtol
|
|
||||||
|
|
||||||
opt_result = optimize.fmin_tnc(f_fp, x_init, messages=self.messages,
|
|
||||||
maxfun=self.max_f_eval, **opt_dict)
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.f_opt = f_fp(self.x_opt)[0]
|
|
||||||
self.funct_eval = opt_result[1]
|
|
||||||
self.status = tnc_rcstrings[opt_result[2]]
|
|
||||||
|
|
||||||
class opt_lbfgsb(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "L-BFGS-B (Scipy implementation)"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
"""
|
|
||||||
Run the optimizer
|
|
||||||
|
|
||||||
"""
|
|
||||||
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
|
|
||||||
|
|
||||||
assert f_fp != None, "BFGS requires f_fp"
|
|
||||||
|
|
||||||
if self.messages:
|
|
||||||
iprint = 1
|
|
||||||
else:
|
|
||||||
iprint = -1
|
|
||||||
|
|
||||||
opt_dict = {}
|
|
||||||
if self.xtol is not None:
|
|
||||||
print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
|
|
||||||
if self.ftol is not None:
|
|
||||||
print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
|
|
||||||
if self.gtol is not None:
|
|
||||||
opt_dict['pgtol'] = self.gtol
|
|
||||||
if self.bfgs_factor is not None:
|
|
||||||
opt_dict['factr'] = self.bfgs_factor
|
|
||||||
|
|
||||||
opt_result = optimize.fmin_l_bfgs_b(f_fp, x_init, iprint=iprint,
|
|
||||||
maxfun=self.max_iters, **opt_dict)
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.f_opt = f_fp(self.x_opt)[0]
|
|
||||||
self.funct_eval = opt_result[2]['funcalls']
|
|
||||||
self.status = rcstrings[opt_result[2]['warnflag']]
|
|
||||||
|
|
||||||
#a more helpful error message is available in opt_result in the Error case
|
|
||||||
if opt_result[2]['warnflag']==2:
|
|
||||||
self.status = 'Error' + str(opt_result[2]['task'])
|
|
||||||
|
|
||||||
class opt_bfgs(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "BFGS (Scipy implementation)"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
"""
|
|
||||||
Run the optimizer
|
|
||||||
|
|
||||||
"""
|
|
||||||
rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing']
|
|
||||||
|
|
||||||
opt_dict = {}
|
|
||||||
if self.xtol is not None:
|
|
||||||
print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it")
|
|
||||||
if self.ftol is not None:
|
|
||||||
print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it")
|
|
||||||
if self.gtol is not None:
|
|
||||||
opt_dict['pgtol'] = self.gtol
|
|
||||||
|
|
||||||
opt_result = optimize.fmin_bfgs(f, x_init, fp, disp=self.messages,
|
|
||||||
maxiter=self.max_iters, full_output=True, **opt_dict)
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.f_opt = f_fp(self.x_opt)[0]
|
|
||||||
self.funct_eval = opt_result[4]
|
|
||||||
self.status = rcstrings[opt_result[6]]
|
|
||||||
|
|
||||||
class opt_simplex(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
"""
|
|
||||||
The simplex optimizer does not require gradients.
|
|
||||||
"""
|
|
||||||
|
|
||||||
statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
|
|
||||||
|
|
||||||
opt_dict = {}
|
|
||||||
if self.xtol is not None:
|
|
||||||
opt_dict['xtol'] = self.xtol
|
|
||||||
if self.ftol is not None:
|
|
||||||
opt_dict['ftol'] = self.ftol
|
|
||||||
if self.gtol is not None:
|
|
||||||
print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
|
|
||||||
|
|
||||||
opt_result = optimize.fmin(f, x_init, (), disp=self.messages,
|
|
||||||
maxfun=self.max_f_eval, full_output=True, **opt_dict)
|
|
||||||
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.f_opt = opt_result[1]
|
|
||||||
self.funct_eval = opt_result[3]
|
|
||||||
self.status = statuses[opt_result[4]]
|
|
||||||
self.trace = None
|
|
||||||
|
|
||||||
|
|
||||||
class opt_rasm(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "Rasmussen's Conjugate Gradient"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
"""
|
|
||||||
Run Rasmussen's Conjugate Gradient optimizer
|
|
||||||
"""
|
|
||||||
|
|
||||||
assert f_fp != None, "Rasmussen's minimizer requires f_fp"
|
|
||||||
statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
|
|
||||||
'NaNs in optimization']
|
|
||||||
|
|
||||||
opt_dict = {}
|
|
||||||
if self.xtol is not None:
|
|
||||||
print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
|
|
||||||
if self.ftol is not None:
|
|
||||||
print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
|
|
||||||
if self.gtol is not None:
|
|
||||||
print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
|
|
||||||
|
|
||||||
opt_result = rasm.minimize(x_init, f_fp, (), messages=self.messages,
|
|
||||||
maxnumfuneval=self.max_f_eval)
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.f_opt = opt_result[1][-1]
|
|
||||||
self.funct_eval = opt_result[2]
|
|
||||||
self.status = statuses[opt_result[3]]
|
|
||||||
|
|
||||||
self.trace = opt_result[1]
|
|
||||||
|
|
||||||
class opt_SCG(Optimizer):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
if 'max_f_eval' in kwargs:
|
|
||||||
warn("max_f_eval deprecated for SCG optimizer: use max_iters instead!\nIgnoring max_f_eval!", FutureWarning)
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
self.opt_name = "Scaled Conjugate Gradients"
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
assert not f is None
|
|
||||||
assert not fp is None
|
|
||||||
|
|
||||||
opt_result = SCG(f, fp, x_init, display=self.messages,
|
|
||||||
maxiters=self.max_iters,
|
|
||||||
max_f_eval=self.max_f_eval,
|
|
||||||
xtol=self.xtol, ftol=self.ftol,
|
|
||||||
gtol=self.gtol)
|
|
||||||
|
|
||||||
self.x_opt = opt_result[0]
|
|
||||||
self.trace = opt_result[1]
|
|
||||||
self.f_opt = self.trace[-1]
|
|
||||||
self.funct_eval = opt_result[2]
|
|
||||||
self.status = opt_result[3]
|
|
||||||
|
|
||||||
class Opt_Adadelta(Optimizer):
|
|
||||||
def __init__(self, step_rate=0.1, decay=0.9, momentum=0, *args, **kwargs):
|
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
|
||||||
self.opt_name = "Adadelta (climin)"
|
|
||||||
self.step_rate=step_rate
|
|
||||||
self.decay = decay
|
|
||||||
self.momentum = momentum
|
|
||||||
|
|
||||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
|
||||||
assert not fp is None
|
|
||||||
|
|
||||||
import climin
|
|
||||||
|
|
||||||
opt = climin.adadelta.Adadelta(x_init, fp, step_rate=self.step_rate, decay=self.decay, momentum=self.momentum)
|
|
||||||
|
|
||||||
for info in opt:
|
|
||||||
if info['n_iter']>=self.max_iters:
|
|
||||||
self.x_opt = opt.wrt
|
|
||||||
self.status = 'maximum number of function evaluations exceeded '
|
|
||||||
break
|
|
||||||
|
|
||||||
def get_optimizer(f_min):
|
|
||||||
|
|
||||||
optimizers = {'fmin_tnc': opt_tnc,
|
|
||||||
'simplex': opt_simplex,
|
|
||||||
'lbfgsb': opt_lbfgsb,
|
|
||||||
'org-bfgs': opt_bfgs,
|
|
||||||
'scg': opt_SCG,
|
|
||||||
'adadelta':Opt_Adadelta}
|
|
||||||
|
|
||||||
if rasm_available:
|
|
||||||
optimizers['rasmussen'] = opt_rasm
|
|
||||||
|
|
||||||
for opt_name in optimizers.keys():
|
|
||||||
if opt_name.lower().find(f_min.lower()) != -1:
|
|
||||||
return optimizers[opt_name]
|
|
||||||
|
|
||||||
raise KeyError('No optimizer was found matching the name: %s' % f_min)
|
|
||||||
|
|
@ -1,193 +0,0 @@
|
||||||
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2014)
|
|
||||||
|
|
||||||
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
|
||||||
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
|
||||||
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
|
||||||
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
||||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
||||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
|
||||||
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
||||||
# HOWEVER CAUSED AND ON ANY THEORY OF
|
|
||||||
# LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
||||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
import numpy as np
|
|
||||||
import sys
|
|
||||||
|
|
||||||
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
|
|
||||||
print('\r', end=' ')
|
|
||||||
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
def exponents(fnow, current_grad):
|
|
||||||
exps = [np.abs(np.float(fnow)), current_grad]
|
|
||||||
return np.sign(exps) * np.log10(exps).astype(int)
|
|
||||||
|
|
||||||
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
|
|
||||||
"""
|
|
||||||
Optimisation through Scaled Conjugate Gradients (SCG)
|
|
||||||
|
|
||||||
f: the objective function
|
|
||||||
gradf : the gradient function (should return a 1D np.ndarray)
|
|
||||||
x : the initial condition
|
|
||||||
|
|
||||||
Returns
|
|
||||||
x the optimal value for x
|
|
||||||
flog : a list of all the objective values
|
|
||||||
function_eval number of fn evaluations
|
|
||||||
status: string describing convergence status
|
|
||||||
"""
|
|
||||||
if xtol is None:
|
|
||||||
xtol = 1e-6
|
|
||||||
if ftol is None:
|
|
||||||
ftol = 1e-6
|
|
||||||
if gtol is None:
|
|
||||||
gtol = 1e-5
|
|
||||||
|
|
||||||
sigma0 = 1.0e-7
|
|
||||||
fold = f(x, *optargs) # Initial function value.
|
|
||||||
function_eval = 1
|
|
||||||
fnow = fold
|
|
||||||
gradnew = gradf(x, *optargs) # Initial gradient.
|
|
||||||
function_eval += 1
|
|
||||||
#if any(np.isnan(gradnew)):
|
|
||||||
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
|
|
||||||
current_grad = np.dot(gradnew, gradnew)
|
|
||||||
gradold = gradnew.copy()
|
|
||||||
d = -gradnew # Initial search direction.
|
|
||||||
success = True # Force calculation of directional derivs.
|
|
||||||
nsuccess = 0 # nsuccess counts number of successes.
|
|
||||||
beta = 1.0 # Initial scale parameter.
|
|
||||||
betamin = 1.0e-15 # Lower bound on scale.
|
|
||||||
betamax = 1.0e15 # Upper bound on scale.
|
|
||||||
status = "Not converged"
|
|
||||||
|
|
||||||
flog = [fold]
|
|
||||||
|
|
||||||
iteration = 0
|
|
||||||
|
|
||||||
len_maxiters = len(str(maxiters))
|
|
||||||
if display:
|
|
||||||
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
|
|
||||||
exps = exponents(fnow, current_grad)
|
|
||||||
p_iter = iteration
|
|
||||||
|
|
||||||
# Main optimization loop.
|
|
||||||
while iteration < maxiters:
|
|
||||||
|
|
||||||
# Calculate first and second directional derivatives.
|
|
||||||
if success:
|
|
||||||
mu = np.dot(d, gradnew)
|
|
||||||
if mu >= 0:
|
|
||||||
d = -gradnew
|
|
||||||
mu = np.dot(d, gradnew)
|
|
||||||
kappa = np.dot(d, d)
|
|
||||||
sigma = sigma0 / np.sqrt(kappa)
|
|
||||||
xplus = x + sigma * d
|
|
||||||
gplus = gradf(xplus, *optargs)
|
|
||||||
function_eval += 1
|
|
||||||
theta = np.dot(d, (gplus - gradnew)) / sigma
|
|
||||||
|
|
||||||
# Increase effective curvature and evaluate step size alpha.
|
|
||||||
delta = theta + beta * kappa
|
|
||||||
if delta <= 0:
|
|
||||||
delta = beta * kappa
|
|
||||||
beta = beta - theta / kappa
|
|
||||||
|
|
||||||
alpha = -mu / delta
|
|
||||||
|
|
||||||
# Calculate the comparison ratio.
|
|
||||||
xnew = x + alpha * d
|
|
||||||
fnew = f(xnew, *optargs)
|
|
||||||
function_eval += 1
|
|
||||||
|
|
||||||
if function_eval >= max_f_eval:
|
|
||||||
status = "maximum number of function evaluations exceeded"
|
|
||||||
break
|
|
||||||
return x, flog, function_eval, status
|
|
||||||
|
|
||||||
Delta = 2.*(fnew - fold) / (alpha * mu)
|
|
||||||
if Delta >= 0.:
|
|
||||||
success = True
|
|
||||||
nsuccess += 1
|
|
||||||
x = xnew
|
|
||||||
fnow = fnew
|
|
||||||
else:
|
|
||||||
success = False
|
|
||||||
fnow = fold
|
|
||||||
|
|
||||||
# Store relevant variables
|
|
||||||
flog.append(fnow) # Current function value
|
|
||||||
|
|
||||||
iteration += 1
|
|
||||||
if display:
|
|
||||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
|
||||||
n_exps = exponents(fnow, current_grad)
|
|
||||||
if iteration - p_iter >= 20 * np.random.rand():
|
|
||||||
a = iteration >= p_iter * 2.78
|
|
||||||
b = np.any(n_exps < exps)
|
|
||||||
if a or b:
|
|
||||||
p_iter = iteration
|
|
||||||
print('')
|
|
||||||
if b:
|
|
||||||
exps = n_exps
|
|
||||||
|
|
||||||
if success:
|
|
||||||
# Test for termination
|
|
||||||
|
|
||||||
if (np.abs(fnew - fold) < ftol):
|
|
||||||
status = 'converged - relative reduction in objective'
|
|
||||||
break
|
|
||||||
# return x, flog, function_eval, status
|
|
||||||
elif (np.max(np.abs(alpha * d)) < xtol):
|
|
||||||
status = 'converged - relative stepsize'
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
# Update variables for new position
|
|
||||||
gradold = gradnew
|
|
||||||
gradnew = gradf(x, *optargs)
|
|
||||||
function_eval += 1
|
|
||||||
current_grad = np.dot(gradnew, gradnew)
|
|
||||||
fold = fnew
|
|
||||||
# If the gradient is zero then we are done.
|
|
||||||
if current_grad <= gtol:
|
|
||||||
status = 'converged - relative reduction in gradient'
|
|
||||||
break
|
|
||||||
# return x, flog, function_eval, status
|
|
||||||
|
|
||||||
# Adjust beta according to comparison ratio.
|
|
||||||
if Delta < 0.25:
|
|
||||||
beta = min(4.0 * beta, betamax)
|
|
||||||
if Delta > 0.75:
|
|
||||||
beta = max(0.25 * beta, betamin)
|
|
||||||
|
|
||||||
# Update search direction using Polak-Ribiere formula, or re-start
|
|
||||||
# in direction of negative gradient after nparams steps.
|
|
||||||
if nsuccess == x.size:
|
|
||||||
d = -gradnew
|
|
||||||
beta = 1. # This is not in the original paper
|
|
||||||
nsuccess = 0
|
|
||||||
elif success:
|
|
||||||
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
|
|
||||||
d = Gamma * d - gradnew
|
|
||||||
else:
|
|
||||||
# If we get here, then we haven't terminated in the given number of
|
|
||||||
# iterations.
|
|
||||||
status = "maxiter exceeded"
|
|
||||||
|
|
||||||
if display:
|
|
||||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
|
||||||
print("")
|
|
||||||
print(status)
|
|
||||||
return x, flog, function_eval, status
|
|
||||||
|
|
@ -1,92 +0,0 @@
|
||||||
# Copyright (c) 2012-2014, Max Zwiessele
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
class StochasticStorage(object):
|
|
||||||
'''
|
|
||||||
This is a container for holding the stochastic parameters,
|
|
||||||
such as subset indices or step length and so on.
|
|
||||||
|
|
||||||
self.d has to be a list of lists:
|
|
||||||
[dimension indices, nan indices for those dimensions]
|
|
||||||
so that the minibatches can be used as efficiently as possible.10
|
|
||||||
'''
|
|
||||||
def __init__(self, model):
|
|
||||||
"""
|
|
||||||
Initialize this stochastic container using the given model
|
|
||||||
"""
|
|
||||||
|
|
||||||
def do_stochastics(self):
|
|
||||||
"""
|
|
||||||
Update the internal state to the next batch of the stochastic
|
|
||||||
descent algorithm.
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
"""
|
|
||||||
Reset the state of this stochastics generator.
|
|
||||||
"""
|
|
||||||
|
|
||||||
class SparseGPMissing(StochasticStorage):
|
|
||||||
def __init__(self, model, batchsize=1):
|
|
||||||
"""
|
|
||||||
Here we want to loop over all dimensions everytime.
|
|
||||||
Thus, we can just make sure the loop goes over self.d every
|
|
||||||
time. We will try to get batches which look the same together
|
|
||||||
which speeds up calculations significantly.
|
|
||||||
"""
|
|
||||||
import numpy as np
|
|
||||||
self.Y = model.Y_normalized
|
|
||||||
bdict = {}
|
|
||||||
#For N > 1000 array2string default crops
|
|
||||||
opt = np.get_printoptions()
|
|
||||||
np.set_printoptions(threshold=np.inf)
|
|
||||||
for d in range(self.Y.shape[1]):
|
|
||||||
inan = np.isnan(self.Y)[:, d]
|
|
||||||
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
|
||||||
try:
|
|
||||||
bdict[arr_str][0].append(d)
|
|
||||||
except:
|
|
||||||
bdict[arr_str] = [[d], ~inan]
|
|
||||||
np.set_printoptions(**opt)
|
|
||||||
self.d = bdict.values()
|
|
||||||
|
|
||||||
class SparseGPStochastics(StochasticStorage):
|
|
||||||
"""
|
|
||||||
For the sparse gp we need to store the dimension we are in,
|
|
||||||
and the indices corresponding to those
|
|
||||||
"""
|
|
||||||
def __init__(self, model, batchsize=1, missing_data=True):
|
|
||||||
self.batchsize = batchsize
|
|
||||||
self.output_dim = model.Y.shape[1]
|
|
||||||
self.Y = model.Y_normalized
|
|
||||||
self.missing_data = missing_data
|
|
||||||
self.reset()
|
|
||||||
self.do_stochastics()
|
|
||||||
|
|
||||||
def do_stochastics(self):
|
|
||||||
import numpy as np
|
|
||||||
if self.batchsize == 1:
|
|
||||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
|
||||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
|
||||||
else:
|
|
||||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
|
||||||
bdict = {}
|
|
||||||
if self.missing_data:
|
|
||||||
opt = np.get_printoptions()
|
|
||||||
np.set_printoptions(threshold=np.inf)
|
|
||||||
for d in self.d:
|
|
||||||
inan = np.isnan(self.Y[:, d])
|
|
||||||
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
|
||||||
try:
|
|
||||||
bdict[arr_str][0].append(d)
|
|
||||||
except:
|
|
||||||
bdict[arr_str] = [[d], ~inan]
|
|
||||||
np.set_printoptions(**opt)
|
|
||||||
self.d = bdict.values()
|
|
||||||
else:
|
|
||||||
self.d = [[self.d, None]]
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.current_dim = -1
|
|
||||||
self.d = None
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue