mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
[optimization] deleted and backwardscompatible
This commit is contained in:
parent
850c10beaa
commit
913d80f712
8 changed files with 11 additions and 914 deletions
|
|
@ -20,7 +20,6 @@ backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_rema
|
|||
for bc in backwards_compatibility:
|
||||
sys.modules['GPy.core.parameterization.{!s}'.format(bc)] = getattr(core.parameterization, bc)
|
||||
|
||||
|
||||
# Direct imports for convenience:
|
||||
from .core import Model
|
||||
from .core.parameterization import priors
|
||||
|
|
@ -52,5 +51,6 @@ def load(file_or_path):
|
|||
for name, module in inspect.getmembers(kern.src): # @UndefinedVariable
|
||||
if not name.startswith('_'):
|
||||
sys.modules['GPy.kern._src.{}'.format(name)] = module
|
||||
sys.modules['GPy.inference.optimization'] = inference.optimization
|
||||
import paramz
|
||||
return paramz.load(file_or_path)
|
||||
|
|
@ -1,3 +1,7 @@
|
|||
from . import latent_function_inference
|
||||
from . import optimization
|
||||
from . import latent_function_inference
|
||||
from . import mcmc
|
||||
|
||||
import sys
|
||||
sys.modules['GPy.inference.optimization'] = optimization
|
||||
sys.modules['GPy.inference.optimization.optimization'] = optimization
|
||||
|
|
|
|||
|
|
@ -1,2 +1,5 @@
|
|||
from .scg import SCG
|
||||
from .optimization import *
|
||||
from paramz.optimization import stochastics, Optimizer
|
||||
from paramz.optimization import *
|
||||
import sys
|
||||
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
|
||||
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from .gradient_descent_update_rules import FletcherReeves, \
|
||||
PolakRibiere
|
||||
from Queue import Empty
|
||||
from multiprocessing import Value
|
||||
from multiprocessing.queues import Queue
|
||||
from multiprocessing.synchronize import Event
|
||||
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
||||
from threading import Thread
|
||||
import numpy
|
||||
import sys
|
||||
import time
|
||||
|
||||
RUNNING = "running"
|
||||
CONVERGED = "converged"
|
||||
MAXITER = "maximum number of iterations reached"
|
||||
MAX_F_EVAL = "maximum number of function calls reached"
|
||||
LINE_SEARCH = "line search failed"
|
||||
KBINTERRUPT = "interrupted"
|
||||
|
||||
class _Async_Optimization(Thread):
|
||||
|
||||
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
|
||||
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
||||
gtol=1e-6, outqueue=None, *args, **kw):
|
||||
"""
|
||||
Helper Process class for async optimization
|
||||
|
||||
f_call and df_call are Multiprocessing Values, for synchronized assignment
|
||||
"""
|
||||
self.f_call = Value('i', 0)
|
||||
self.df_call = Value('i', 0)
|
||||
self.f = self.f_wrapper(f, self.f_call)
|
||||
self.df = self.f_wrapper(df, self.df_call)
|
||||
self.x0 = x0
|
||||
self.update_rule = update_rule
|
||||
self.report_every = report_every
|
||||
self.messages = messages
|
||||
self.maxiter = maxiter
|
||||
self.max_f_eval = max_f_eval
|
||||
self.gtol = gtol
|
||||
self.SENTINEL = SENTINEL
|
||||
self.runsignal = runsignal
|
||||
# self.parent = parent
|
||||
# self.result = None
|
||||
self.outq = outqueue
|
||||
super(_Async_Optimization, self).__init__(target=self.run,
|
||||
name="CG Optimization",
|
||||
*args, **kw)
|
||||
|
||||
# def __enter__(self):
|
||||
# return self
|
||||
#
|
||||
# def __exit__(self, type, value, traceback):
|
||||
# return isinstance(value, TypeError)
|
||||
|
||||
def f_wrapper(self, f, counter):
|
||||
def f_w(*a, **kw):
|
||||
counter.value += 1
|
||||
return f(*a, **kw)
|
||||
return f_w
|
||||
|
||||
def callback(self, *a):
|
||||
if self.outq is not None:
|
||||
self.outq.put(a)
|
||||
# self.parent and self.parent.callback(*a, **kw)
|
||||
pass
|
||||
# print "callback done"
|
||||
|
||||
def callback_return(self, *a):
|
||||
self.callback(*a)
|
||||
if self.outq is not None:
|
||||
self.outq.put(self.SENTINEL)
|
||||
if self.messages:
|
||||
print("")
|
||||
self.runsignal.clear()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
raise NotImplementedError("Overwrite this with optimization (for async use)")
|
||||
pass
|
||||
|
||||
class _CGDAsync(_Async_Optimization):
|
||||
|
||||
def reset(self, xi, *a, **kw):
|
||||
gi = -self.df(xi, *a, **kw)
|
||||
si = gi
|
||||
ur = self.update_rule(gi)
|
||||
return gi, ur, si
|
||||
|
||||
def run(self, *a, **kw):
|
||||
status = RUNNING
|
||||
|
||||
fi = self.f(self.x0)
|
||||
fi_old = fi + 5000
|
||||
|
||||
gi, ur, si = self.reset(self.x0, *a, **kw)
|
||||
xi = self.x0
|
||||
xi_old = numpy.nan
|
||||
it = 0
|
||||
|
||||
while it < self.maxiter:
|
||||
if not self.runsignal.is_set():
|
||||
break
|
||||
|
||||
if self.f_call.value > self.max_f_eval:
|
||||
status = MAX_F_EVAL
|
||||
|
||||
gi = -self.df(xi, *a, **kw)
|
||||
if numpy.dot(gi.T, gi) <= self.gtol:
|
||||
status = CONVERGED
|
||||
break
|
||||
if numpy.isnan(numpy.dot(gi.T, gi)):
|
||||
if numpy.any(numpy.isnan(xi_old)):
|
||||
status = CONVERGED
|
||||
break
|
||||
self.reset(xi_old)
|
||||
|
||||
gammai = ur(gi)
|
||||
if gammai < 1e-6 or it % xi.shape[0] == 0:
|
||||
gi, ur, si = self.reset(xi, *a, **kw)
|
||||
si = gi + gammai * si
|
||||
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
|
||||
self.df,
|
||||
xi,
|
||||
si, gi,
|
||||
fi, fi_old)
|
||||
if alphai is None:
|
||||
alphai, _, _, fi2, fi_old2, gfi = \
|
||||
line_search_wolfe2(self.f, self.df,
|
||||
xi, si, gi,
|
||||
fi, fi_old)
|
||||
if alphai is None:
|
||||
# This line search also failed to find a better solution.
|
||||
status = LINE_SEARCH
|
||||
break
|
||||
if fi2 < fi:
|
||||
fi, fi_old = fi2, fi_old2
|
||||
if gfi is not None:
|
||||
gi = gfi
|
||||
|
||||
if numpy.isnan(fi) or fi_old < fi:
|
||||
gi, ur, si = self.reset(xi, *a, **kw)
|
||||
|
||||
else:
|
||||
xi += numpy.dot(alphai, si)
|
||||
if self.messages:
|
||||
sys.stdout.write("\r")
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
||||
|
||||
if it % self.report_every == 0:
|
||||
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||
it += 1
|
||||
else:
|
||||
status = MAXITER
|
||||
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
|
||||
|
||||
class Async_Optimize(object):
|
||||
callback = lambda *x: None
|
||||
runsignal = Event()
|
||||
SENTINEL = "SENTINEL"
|
||||
|
||||
def async_callback_collect(self, q):
|
||||
while self.runsignal.is_set():
|
||||
try:
|
||||
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
||||
self.callback(*ret)
|
||||
self.runsignal.clear()
|
||||
except Empty:
|
||||
pass
|
||||
|
||||
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs):
|
||||
self.runsignal.set()
|
||||
c = None
|
||||
outqueue = None
|
||||
if callback:
|
||||
outqueue = Queue()
|
||||
self.callback = callback
|
||||
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
||||
c.start()
|
||||
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
|
||||
report_every=report_every, messages=messages, maxiter=maxiter,
|
||||
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
||||
p.start()
|
||||
return p, c
|
||||
|
||||
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs):
|
||||
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
|
||||
maxiter, max_f_eval, gtol,
|
||||
report_every, *args, **kwargs)
|
||||
while self.runsignal.is_set():
|
||||
try:
|
||||
p.join(1)
|
||||
if c: c.join(1)
|
||||
except KeyboardInterrupt:
|
||||
# print "^C"
|
||||
self.runsignal.clear()
|
||||
p.join()
|
||||
if c: c.join()
|
||||
if c and c.is_alive():
|
||||
# self.runsignal.set()
|
||||
# while self.runsignal.is_set():
|
||||
# try:
|
||||
# c.join(.1)
|
||||
# except KeyboardInterrupt:
|
||||
# # print "^C"
|
||||
# self.runsignal.clear()
|
||||
# c.join()
|
||||
print("WARNING: callback still running, optimisation done!")
|
||||
return p.result
|
||||
|
||||
class CGD(Async_Optimize):
|
||||
'''
|
||||
Conjugate gradient descent algorithm to minimize
|
||||
function f with gradients df, starting at x0
|
||||
with update rule update_rule
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
'''
|
||||
opt_name = "Conjugate Gradient Descent"
|
||||
|
||||
def opt_async(self, *a, **kw):
|
||||
"""
|
||||
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
callback gets called every `report_every` iterations
|
||||
|
||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**Returns:**
|
||||
|
||||
Started `Process` object, optimizing asynchronously
|
||||
|
||||
**Calls:**
|
||||
|
||||
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
at end of optimization!
|
||||
"""
|
||||
return super(CGD, self).opt_async(*a, **kw)
|
||||
|
||||
def opt(self, *a, **kw):
|
||||
"""
|
||||
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
Minimize f, calling callback every `report_every` iterations with following syntax:
|
||||
|
||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**returns**
|
||||
|
||||
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
||||
|
||||
at end of optimization
|
||||
"""
|
||||
return super(CGD, self).opt(*a, **kw)
|
||||
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy
|
||||
|
||||
class GDUpdateRule():
|
||||
_gradnat = None
|
||||
_gradnatold = None
|
||||
def __init__(self, initgrad, initgradnat=None):
|
||||
self.grad = initgrad
|
||||
if initgradnat:
|
||||
self.gradnat = initgradnat
|
||||
else:
|
||||
self.gradnat = initgrad
|
||||
# self.grad, self.gradnat
|
||||
def _gamma(self):
|
||||
raise NotImplemented("""Implement gamma update rule here,
|
||||
you can use self.grad and self.gradold for parameters, as well as
|
||||
self.gradnat and self.gradnatold for natural gradients.""")
|
||||
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
|
||||
"""
|
||||
Return gamma for given gradients and optional natural gradients
|
||||
"""
|
||||
if not gradnat:
|
||||
gradnat = grad
|
||||
self.gradold = self.grad
|
||||
self.gradnatold = self.gradnat
|
||||
self.grad = grad
|
||||
self.gradnat = gradnat
|
||||
self.si = si
|
||||
return self._gamma(*args, **kw)
|
||||
|
||||
class FletcherReeves(GDUpdateRule):
|
||||
'''
|
||||
Fletcher Reeves update rule for gamma
|
||||
'''
|
||||
def _gamma(self, *a, **kw):
|
||||
tmp = numpy.dot(self.grad.T, self.gradnat)
|
||||
if tmp:
|
||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||
return tmp
|
||||
|
||||
class PolakRibiere(GDUpdateRule):
|
||||
'''
|
||||
Fletcher Reeves update rule for gamma
|
||||
'''
|
||||
def _gamma(self, *a, **kw):
|
||||
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
|
||||
if tmp:
|
||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||
return tmp
|
||||
|
|
@ -1,289 +0,0 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import datetime as dt
|
||||
from scipy import optimize
|
||||
from warnings import warn
|
||||
|
||||
try:
|
||||
import rasmussens_minimize as rasm
|
||||
rasm_available = True
|
||||
except ImportError:
|
||||
rasm_available = False
|
||||
from .scg import SCG
|
||||
|
||||
class Optimizer(object):
|
||||
"""
|
||||
Superclass for all the optimizers.
|
||||
|
||||
:param x_init: initial set of parameters
|
||||
:param f_fp: function that returns the function AND the gradients at the same time
|
||||
:param f: function to optimize
|
||||
:param fp: gradients
|
||||
:param messages: print messages from the optimizer?
|
||||
:type messages: (True | False)
|
||||
:param max_f_eval: maximum number of function evaluations
|
||||
|
||||
:rtype: optimizer object.
|
||||
|
||||
"""
|
||||
def __init__(self, x_init=None, messages=False, max_f_eval=1e4, max_iters=1e3,
|
||||
ftol=None, gtol=None, xtol=None, bfgs_factor=None):
|
||||
self.opt_name = None
|
||||
#x_init = x_init
|
||||
# Turning messages off and using internal structure for print outs:
|
||||
self.messages = False #messages
|
||||
self.f_opt = None
|
||||
self.x_opt = None
|
||||
self.funct_eval = None
|
||||
self.status = None
|
||||
self.max_f_eval = int(max_iters)
|
||||
self.max_iters = int(max_iters)
|
||||
self.bfgs_factor = bfgs_factor
|
||||
self.trace = None
|
||||
self.time = "Not available"
|
||||
self.xtol = xtol
|
||||
self.gtol = gtol
|
||||
self.ftol = ftol
|
||||
|
||||
def run(self, **kwargs):
|
||||
start = dt.datetime.now()
|
||||
self.opt(**kwargs)
|
||||
end = dt.datetime.now()
|
||||
self.time = str(end - start)
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
raise NotImplementedError("this needs to be implemented to use the optimizer class")
|
||||
|
||||
def __str__(self):
|
||||
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
|
||||
diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
|
||||
diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
|
||||
diagnostics += "Optimization status: \t\t\t %s\n" % self.status
|
||||
diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
|
||||
return diagnostics
|
||||
|
||||
def __getstate__(self):
|
||||
return []
|
||||
|
||||
|
||||
class opt_tnc(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "TNC (Scipy implementation)"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
"""
|
||||
Run the TNC optimizer
|
||||
|
||||
"""
|
||||
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
|
||||
'Line search failed', 'Function is constant']
|
||||
|
||||
assert f_fp != None, "TNC requires f_fp"
|
||||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
opt_dict['xtol'] = self.xtol
|
||||
if self.ftol is not None:
|
||||
opt_dict['ftol'] = self.ftol
|
||||
if self.gtol is not None:
|
||||
opt_dict['pgtol'] = self.gtol
|
||||
|
||||
opt_result = optimize.fmin_tnc(f_fp, x_init, messages=self.messages,
|
||||
maxfun=self.max_f_eval, **opt_dict)
|
||||
self.x_opt = opt_result[0]
|
||||
self.f_opt = f_fp(self.x_opt)[0]
|
||||
self.funct_eval = opt_result[1]
|
||||
self.status = tnc_rcstrings[opt_result[2]]
|
||||
|
||||
class opt_lbfgsb(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "L-BFGS-B (Scipy implementation)"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
"""
|
||||
Run the optimizer
|
||||
|
||||
"""
|
||||
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
|
||||
|
||||
assert f_fp != None, "BFGS requires f_fp"
|
||||
|
||||
if self.messages:
|
||||
iprint = 1
|
||||
else:
|
||||
iprint = -1
|
||||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
|
||||
if self.ftol is not None:
|
||||
print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
|
||||
if self.gtol is not None:
|
||||
opt_dict['pgtol'] = self.gtol
|
||||
if self.bfgs_factor is not None:
|
||||
opt_dict['factr'] = self.bfgs_factor
|
||||
|
||||
opt_result = optimize.fmin_l_bfgs_b(f_fp, x_init, iprint=iprint,
|
||||
maxfun=self.max_iters, **opt_dict)
|
||||
self.x_opt = opt_result[0]
|
||||
self.f_opt = f_fp(self.x_opt)[0]
|
||||
self.funct_eval = opt_result[2]['funcalls']
|
||||
self.status = rcstrings[opt_result[2]['warnflag']]
|
||||
|
||||
#a more helpful error message is available in opt_result in the Error case
|
||||
if opt_result[2]['warnflag']==2:
|
||||
self.status = 'Error' + str(opt_result[2]['task'])
|
||||
|
||||
class opt_bfgs(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "BFGS (Scipy implementation)"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
"""
|
||||
Run the optimizer
|
||||
|
||||
"""
|
||||
rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing']
|
||||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it")
|
||||
if self.ftol is not None:
|
||||
print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it")
|
||||
if self.gtol is not None:
|
||||
opt_dict['pgtol'] = self.gtol
|
||||
|
||||
opt_result = optimize.fmin_bfgs(f, x_init, fp, disp=self.messages,
|
||||
maxiter=self.max_iters, full_output=True, **opt_dict)
|
||||
self.x_opt = opt_result[0]
|
||||
self.f_opt = f_fp(self.x_opt)[0]
|
||||
self.funct_eval = opt_result[4]
|
||||
self.status = rcstrings[opt_result[6]]
|
||||
|
||||
class opt_simplex(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
"""
|
||||
The simplex optimizer does not require gradients.
|
||||
"""
|
||||
|
||||
statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
|
||||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
opt_dict['xtol'] = self.xtol
|
||||
if self.ftol is not None:
|
||||
opt_dict['ftol'] = self.ftol
|
||||
if self.gtol is not None:
|
||||
print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
|
||||
|
||||
opt_result = optimize.fmin(f, x_init, (), disp=self.messages,
|
||||
maxfun=self.max_f_eval, full_output=True, **opt_dict)
|
||||
|
||||
self.x_opt = opt_result[0]
|
||||
self.f_opt = opt_result[1]
|
||||
self.funct_eval = opt_result[3]
|
||||
self.status = statuses[opt_result[4]]
|
||||
self.trace = None
|
||||
|
||||
|
||||
class opt_rasm(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "Rasmussen's Conjugate Gradient"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
"""
|
||||
Run Rasmussen's Conjugate Gradient optimizer
|
||||
"""
|
||||
|
||||
assert f_fp != None, "Rasmussen's minimizer requires f_fp"
|
||||
statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
|
||||
'NaNs in optimization']
|
||||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
|
||||
if self.ftol is not None:
|
||||
print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
|
||||
if self.gtol is not None:
|
||||
print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
|
||||
|
||||
opt_result = rasm.minimize(x_init, f_fp, (), messages=self.messages,
|
||||
maxnumfuneval=self.max_f_eval)
|
||||
self.x_opt = opt_result[0]
|
||||
self.f_opt = opt_result[1][-1]
|
||||
self.funct_eval = opt_result[2]
|
||||
self.status = statuses[opt_result[3]]
|
||||
|
||||
self.trace = opt_result[1]
|
||||
|
||||
class opt_SCG(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
if 'max_f_eval' in kwargs:
|
||||
warn("max_f_eval deprecated for SCG optimizer: use max_iters instead!\nIgnoring max_f_eval!", FutureWarning)
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
|
||||
self.opt_name = "Scaled Conjugate Gradients"
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
assert not f is None
|
||||
assert not fp is None
|
||||
|
||||
opt_result = SCG(f, fp, x_init, display=self.messages,
|
||||
maxiters=self.max_iters,
|
||||
max_f_eval=self.max_f_eval,
|
||||
xtol=self.xtol, ftol=self.ftol,
|
||||
gtol=self.gtol)
|
||||
|
||||
self.x_opt = opt_result[0]
|
||||
self.trace = opt_result[1]
|
||||
self.f_opt = self.trace[-1]
|
||||
self.funct_eval = opt_result[2]
|
||||
self.status = opt_result[3]
|
||||
|
||||
class Opt_Adadelta(Optimizer):
|
||||
def __init__(self, step_rate=0.1, decay=0.9, momentum=0, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
self.opt_name = "Adadelta (climin)"
|
||||
self.step_rate=step_rate
|
||||
self.decay = decay
|
||||
self.momentum = momentum
|
||||
|
||||
def opt(self, x_init, f_fp=None, f=None, fp=None):
|
||||
assert not fp is None
|
||||
|
||||
import climin
|
||||
|
||||
opt = climin.adadelta.Adadelta(x_init, fp, step_rate=self.step_rate, decay=self.decay, momentum=self.momentum)
|
||||
|
||||
for info in opt:
|
||||
if info['n_iter']>=self.max_iters:
|
||||
self.x_opt = opt.wrt
|
||||
self.status = 'maximum number of function evaluations exceeded '
|
||||
break
|
||||
|
||||
def get_optimizer(f_min):
|
||||
|
||||
optimizers = {'fmin_tnc': opt_tnc,
|
||||
'simplex': opt_simplex,
|
||||
'lbfgsb': opt_lbfgsb,
|
||||
'org-bfgs': opt_bfgs,
|
||||
'scg': opt_SCG,
|
||||
'adadelta':Opt_Adadelta}
|
||||
|
||||
if rasm_available:
|
||||
optimizers['rasmussen'] = opt_rasm
|
||||
|
||||
for opt_name in optimizers.keys():
|
||||
if opt_name.lower().find(f_min.lower()) != -1:
|
||||
return optimizers[opt_name]
|
||||
|
||||
raise KeyError('No optimizer was found matching the name: %s' % f_min)
|
||||
|
|
@ -1,193 +0,0 @@
|
|||
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2014)
|
||||
|
||||
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
||||
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
|
||||
print('\r', end=' ')
|
||||
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
sys.stdout.flush()
|
||||
|
||||
def exponents(fnow, current_grad):
|
||||
exps = [np.abs(np.float(fnow)), current_grad]
|
||||
return np.sign(exps) * np.log10(exps).astype(int)
|
||||
|
||||
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
|
||||
"""
|
||||
Optimisation through Scaled Conjugate Gradients (SCG)
|
||||
|
||||
f: the objective function
|
||||
gradf : the gradient function (should return a 1D np.ndarray)
|
||||
x : the initial condition
|
||||
|
||||
Returns
|
||||
x the optimal value for x
|
||||
flog : a list of all the objective values
|
||||
function_eval number of fn evaluations
|
||||
status: string describing convergence status
|
||||
"""
|
||||
if xtol is None:
|
||||
xtol = 1e-6
|
||||
if ftol is None:
|
||||
ftol = 1e-6
|
||||
if gtol is None:
|
||||
gtol = 1e-5
|
||||
|
||||
sigma0 = 1.0e-7
|
||||
fold = f(x, *optargs) # Initial function value.
|
||||
function_eval = 1
|
||||
fnow = fold
|
||||
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||
function_eval += 1
|
||||
#if any(np.isnan(gradnew)):
|
||||
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
|
||||
current_grad = np.dot(gradnew, gradnew)
|
||||
gradold = gradnew.copy()
|
||||
d = -gradnew # Initial search direction.
|
||||
success = True # Force calculation of directional derivs.
|
||||
nsuccess = 0 # nsuccess counts number of successes.
|
||||
beta = 1.0 # Initial scale parameter.
|
||||
betamin = 1.0e-15 # Lower bound on scale.
|
||||
betamax = 1.0e15 # Upper bound on scale.
|
||||
status = "Not converged"
|
||||
|
||||
flog = [fold]
|
||||
|
||||
iteration = 0
|
||||
|
||||
len_maxiters = len(str(maxiters))
|
||||
if display:
|
||||
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
|
||||
exps = exponents(fnow, current_grad)
|
||||
p_iter = iteration
|
||||
|
||||
# Main optimization loop.
|
||||
while iteration < maxiters:
|
||||
|
||||
# Calculate first and second directional derivatives.
|
||||
if success:
|
||||
mu = np.dot(d, gradnew)
|
||||
if mu >= 0:
|
||||
d = -gradnew
|
||||
mu = np.dot(d, gradnew)
|
||||
kappa = np.dot(d, d)
|
||||
sigma = sigma0 / np.sqrt(kappa)
|
||||
xplus = x + sigma * d
|
||||
gplus = gradf(xplus, *optargs)
|
||||
function_eval += 1
|
||||
theta = np.dot(d, (gplus - gradnew)) / sigma
|
||||
|
||||
# Increase effective curvature and evaluate step size alpha.
|
||||
delta = theta + beta * kappa
|
||||
if delta <= 0:
|
||||
delta = beta * kappa
|
||||
beta = beta - theta / kappa
|
||||
|
||||
alpha = -mu / delta
|
||||
|
||||
# Calculate the comparison ratio.
|
||||
xnew = x + alpha * d
|
||||
fnew = f(xnew, *optargs)
|
||||
function_eval += 1
|
||||
|
||||
if function_eval >= max_f_eval:
|
||||
status = "maximum number of function evaluations exceeded"
|
||||
break
|
||||
return x, flog, function_eval, status
|
||||
|
||||
Delta = 2.*(fnew - fold) / (alpha * mu)
|
||||
if Delta >= 0.:
|
||||
success = True
|
||||
nsuccess += 1
|
||||
x = xnew
|
||||
fnow = fnew
|
||||
else:
|
||||
success = False
|
||||
fnow = fold
|
||||
|
||||
# Store relevant variables
|
||||
flog.append(fnow) # Current function value
|
||||
|
||||
iteration += 1
|
||||
if display:
|
||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
||||
n_exps = exponents(fnow, current_grad)
|
||||
if iteration - p_iter >= 20 * np.random.rand():
|
||||
a = iteration >= p_iter * 2.78
|
||||
b = np.any(n_exps < exps)
|
||||
if a or b:
|
||||
p_iter = iteration
|
||||
print('')
|
||||
if b:
|
||||
exps = n_exps
|
||||
|
||||
if success:
|
||||
# Test for termination
|
||||
|
||||
if (np.abs(fnew - fold) < ftol):
|
||||
status = 'converged - relative reduction in objective'
|
||||
break
|
||||
# return x, flog, function_eval, status
|
||||
elif (np.max(np.abs(alpha * d)) < xtol):
|
||||
status = 'converged - relative stepsize'
|
||||
break
|
||||
else:
|
||||
# Update variables for new position
|
||||
gradold = gradnew
|
||||
gradnew = gradf(x, *optargs)
|
||||
function_eval += 1
|
||||
current_grad = np.dot(gradnew, gradnew)
|
||||
fold = fnew
|
||||
# If the gradient is zero then we are done.
|
||||
if current_grad <= gtol:
|
||||
status = 'converged - relative reduction in gradient'
|
||||
break
|
||||
# return x, flog, function_eval, status
|
||||
|
||||
# Adjust beta according to comparison ratio.
|
||||
if Delta < 0.25:
|
||||
beta = min(4.0 * beta, betamax)
|
||||
if Delta > 0.75:
|
||||
beta = max(0.25 * beta, betamin)
|
||||
|
||||
# Update search direction using Polak-Ribiere formula, or re-start
|
||||
# in direction of negative gradient after nparams steps.
|
||||
if nsuccess == x.size:
|
||||
d = -gradnew
|
||||
beta = 1. # This is not in the original paper
|
||||
nsuccess = 0
|
||||
elif success:
|
||||
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
|
||||
d = Gamma * d - gradnew
|
||||
else:
|
||||
# If we get here, then we haven't terminated in the given number of
|
||||
# iterations.
|
||||
status = "maxiter exceeded"
|
||||
|
||||
if display:
|
||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
||||
print("")
|
||||
print(status)
|
||||
return x, flog, function_eval, status
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
class StochasticStorage(object):
|
||||
'''
|
||||
This is a container for holding the stochastic parameters,
|
||||
such as subset indices or step length and so on.
|
||||
|
||||
self.d has to be a list of lists:
|
||||
[dimension indices, nan indices for those dimensions]
|
||||
so that the minibatches can be used as efficiently as possible.10
|
||||
'''
|
||||
def __init__(self, model):
|
||||
"""
|
||||
Initialize this stochastic container using the given model
|
||||
"""
|
||||
|
||||
def do_stochastics(self):
|
||||
"""
|
||||
Update the internal state to the next batch of the stochastic
|
||||
descent algorithm.
|
||||
"""
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the state of this stochastics generator.
|
||||
"""
|
||||
|
||||
class SparseGPMissing(StochasticStorage):
|
||||
def __init__(self, model, batchsize=1):
|
||||
"""
|
||||
Here we want to loop over all dimensions everytime.
|
||||
Thus, we can just make sure the loop goes over self.d every
|
||||
time. We will try to get batches which look the same together
|
||||
which speeds up calculations significantly.
|
||||
"""
|
||||
import numpy as np
|
||||
self.Y = model.Y_normalized
|
||||
bdict = {}
|
||||
#For N > 1000 array2string default crops
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in range(self.Y.shape[1]):
|
||||
inan = np.isnan(self.Y)[:, d]
|
||||
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
|
||||
class SparseGPStochastics(StochasticStorage):
|
||||
"""
|
||||
For the sparse gp we need to store the dimension we are in,
|
||||
and the indices corresponding to those
|
||||
"""
|
||||
def __init__(self, model, batchsize=1, missing_data=True):
|
||||
self.batchsize = batchsize
|
||||
self.output_dim = model.Y.shape[1]
|
||||
self.Y = model.Y_normalized
|
||||
self.missing_data = missing_data
|
||||
self.reset()
|
||||
self.do_stochastics()
|
||||
|
||||
def do_stochastics(self):
|
||||
import numpy as np
|
||||
if self.batchsize == 1:
|
||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||
else:
|
||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||
bdict = {}
|
||||
if self.missing_data:
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
else:
|
||||
self.d = [[self.d, None]]
|
||||
|
||||
def reset(self):
|
||||
self.current_dim = -1
|
||||
self.d = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue