[optimization] deleted and backwardscompatible

This commit is contained in:
Max Zwiessele 2015-11-09 13:27:24 +00:00
parent 850c10beaa
commit 913d80f712
8 changed files with 11 additions and 914 deletions

View file

@ -20,7 +20,6 @@ backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_rema
for bc in backwards_compatibility:
sys.modules['GPy.core.parameterization.{!s}'.format(bc)] = getattr(core.parameterization, bc)
# Direct imports for convenience:
from .core import Model
from .core.parameterization import priors
@ -52,5 +51,6 @@ def load(file_or_path):
for name, module in inspect.getmembers(kern.src): # @UndefinedVariable
if not name.startswith('_'):
sys.modules['GPy.kern._src.{}'.format(name)] = module
sys.modules['GPy.inference.optimization'] = inference.optimization
import paramz
return paramz.load(file_or_path)

View file

@ -1,3 +1,7 @@
from . import latent_function_inference
from . import optimization
from . import latent_function_inference
from . import mcmc
import sys
sys.modules['GPy.inference.optimization'] = optimization
sys.modules['GPy.inference.optimization.optimization'] = optimization

View file

@ -1,2 +1,5 @@
from .scg import SCG
from .optimization import *
from paramz.optimization import stochastics, Optimizer
from paramz.optimization import *
import sys
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer

View file

@ -1,285 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .gradient_descent_update_rules import FletcherReeves, \
PolakRibiere
from Queue import Empty
from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread
import numpy
import sys
import time
RUNNING = "running"
CONVERGED = "converged"
MAXITER = "maximum number of iterations reached"
MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted"
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw):
"""
Helper Process class for async optimization
f_call and df_call are Multiprocessing Values, for synchronized assignment
"""
self.f_call = Value('i', 0)
self.df_call = Value('i', 0)
self.f = self.f_wrapper(f, self.f_call)
self.df = self.f_wrapper(df, self.df_call)
self.x0 = x0
self.update_rule = update_rule
self.report_every = report_every
self.messages = messages
self.maxiter = maxiter
self.max_f_eval = max_f_eval
self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal
# self.parent = parent
# self.result = None
self.outq = outqueue
super(_Async_Optimization, self).__init__(target=self.run,
name="CG Optimization",
*args, **kw)
# def __enter__(self):
# return self
#
# def __exit__(self, type, value, traceback):
# return isinstance(value, TypeError)
def f_wrapper(self, f, counter):
def f_w(*a, **kw):
counter.value += 1
return f(*a, **kw)
return f_w
def callback(self, *a):
if self.outq is not None:
self.outq.put(a)
# self.parent and self.parent.callback(*a, **kw)
pass
# print "callback done"
def callback_return(self, *a):
self.callback(*a)
if self.outq is not None:
self.outq.put(self.SENTINEL)
if self.messages:
print("")
self.runsignal.clear()
def run(self, *args, **kwargs):
raise NotImplementedError("Overwrite this with optimization (for async use)")
pass
class _CGDAsync(_Async_Optimization):
def reset(self, xi, *a, **kw):
gi = -self.df(xi, *a, **kw)
si = gi
ur = self.update_rule(gi)
return gi, ur, si
def run(self, *a, **kw):
status = RUNNING
fi = self.f(self.x0)
fi_old = fi + 5000
gi, ur, si = self.reset(self.x0, *a, **kw)
xi = self.x0
xi_old = numpy.nan
it = 0
while it < self.maxiter:
if not self.runsignal.is_set():
break
if self.f_call.value > self.max_f_eval:
status = MAX_F_EVAL
gi = -self.df(xi, *a, **kw)
if numpy.dot(gi.T, gi) <= self.gtol:
status = CONVERGED
break
if numpy.isnan(numpy.dot(gi.T, gi)):
if numpy.any(numpy.isnan(xi_old)):
status = CONVERGED
break
self.reset(xi_old)
gammai = ur(gi)
if gammai < 1e-6 or it % xi.shape[0] == 0:
gi, ur, si = self.reset(xi, *a, **kw)
si = gi + gammai * si
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
self.df,
xi,
si, gi,
fi, fi_old)
if alphai is None:
alphai, _, _, fi2, fi_old2, gfi = \
line_search_wolfe2(self.f, self.df,
xi, si, gi,
fi, fi_old)
if alphai is None:
# This line search also failed to find a better solution.
status = LINE_SEARCH
break
if fi2 < fi:
fi, fi_old = fi2, fi_old2
if gfi is not None:
gi = gfi
if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
if self.messages:
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1
else:
status = MAXITER
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object):
callback = lambda *x: None
runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q):
while self.runsignal.is_set():
try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret)
self.runsignal.clear()
except Empty:
pass
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
self.runsignal.set()
c = None
outqueue = None
if callback:
outqueue = Queue()
self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.start()
return p, c
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
maxiter, max_f_eval, gtol,
report_every, *args, **kwargs)
while self.runsignal.is_set():
try:
p.join(1)
if c: c.join(1)
except KeyboardInterrupt:
# print "^C"
self.runsignal.clear()
p.join()
if c: c.join()
if c and c.is_alive():
# self.runsignal.set()
# while self.runsignal.is_set():
# try:
# c.join(.1)
# except KeyboardInterrupt:
# # print "^C"
# self.runsignal.clear()
# c.join()
print("WARNING: callback still running, optimisation done!")
return p.result
class CGD(Async_Optimize):
'''
Conjugate gradient descent algorithm to minimize
function f with gradients df, starting at x0
with update rule update_rule
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
'''
opt_name = "Conjugate Gradient Descent"
def opt_async(self, *a, **kw):
"""
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, \*args, \*\*kwargs)
callback gets called every `report_every` iterations
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, \*args, \*\*kwargs)
df(xi, \*args, \*\*kwargs)
**Returns:**
Started `Process` object, optimizing asynchronously
**Calls:**
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization!
"""
return super(CGD, self).opt_async(*a, **kw)
def opt(self, *a, **kw):
"""
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, \*args, \*\*kwargs)
Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, \*args, \*\*kwargs)
df(xi, \*args, \*\*kwargs)
**returns**
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization
"""
return super(CGD, self).opt(*a, **kw)

View file

@ -1,51 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy
class GDUpdateRule():
_gradnat = None
_gradnatold = None
def __init__(self, initgrad, initgradnat=None):
self.grad = initgrad
if initgradnat:
self.gradnat = initgradnat
else:
self.gradnat = initgrad
# self.grad, self.gradnat
def _gamma(self):
raise NotImplemented("""Implement gamma update rule here,
you can use self.grad and self.gradold for parameters, as well as
self.gradnat and self.gradnatold for natural gradients.""")
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
"""
Return gamma for given gradients and optional natural gradients
"""
if not gradnat:
gradnat = grad
self.gradold = self.grad
self.gradnatold = self.gradnat
self.grad = grad
self.gradnat = gradnat
self.si = si
return self._gamma(*args, **kw)
class FletcherReeves(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot(self.grad.T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp
class PolakRibiere(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp

View file

@ -1,289 +0,0 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import datetime as dt
from scipy import optimize
from warnings import warn
try:
import rasmussens_minimize as rasm
rasm_available = True
except ImportError:
rasm_available = False
from .scg import SCG
class Optimizer(object):
"""
Superclass for all the optimizers.
:param x_init: initial set of parameters
:param f_fp: function that returns the function AND the gradients at the same time
:param f: function to optimize
:param fp: gradients
:param messages: print messages from the optimizer?
:type messages: (True | False)
:param max_f_eval: maximum number of function evaluations
:rtype: optimizer object.
"""
def __init__(self, x_init=None, messages=False, max_f_eval=1e4, max_iters=1e3,
ftol=None, gtol=None, xtol=None, bfgs_factor=None):
self.opt_name = None
#x_init = x_init
# Turning messages off and using internal structure for print outs:
self.messages = False #messages
self.f_opt = None
self.x_opt = None
self.funct_eval = None
self.status = None
self.max_f_eval = int(max_iters)
self.max_iters = int(max_iters)
self.bfgs_factor = bfgs_factor
self.trace = None
self.time = "Not available"
self.xtol = xtol
self.gtol = gtol
self.ftol = ftol
def run(self, **kwargs):
start = dt.datetime.now()
self.opt(**kwargs)
end = dt.datetime.now()
self.time = str(end - start)
def opt(self, x_init, f_fp=None, f=None, fp=None):
raise NotImplementedError("this needs to be implemented to use the optimizer class")
def __str__(self):
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
diagnostics += "f(x_opt): \t\t\t\t %.3f\n" % self.f_opt
diagnostics += "Number of function evaluations: \t %d\n" % self.funct_eval
diagnostics += "Optimization status: \t\t\t %s\n" % self.status
diagnostics += "Time elapsed: \t\t\t\t %s\n" % self.time
return diagnostics
def __getstate__(self):
return []
class opt_tnc(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "TNC (Scipy implementation)"
def opt(self, x_init, f_fp=None, f=None, fp=None):
"""
Run the TNC optimizer
"""
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
'Line search failed', 'Function is constant']
assert f_fp != None, "TNC requires f_fp"
opt_dict = {}
if self.xtol is not None:
opt_dict['xtol'] = self.xtol
if self.ftol is not None:
opt_dict['ftol'] = self.ftol
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_tnc(f_fp, x_init, messages=self.messages,
maxfun=self.max_f_eval, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[1]
self.status = tnc_rcstrings[opt_result[2]]
class opt_lbfgsb(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "L-BFGS-B (Scipy implementation)"
def opt(self, x_init, f_fp=None, f=None, fp=None):
"""
Run the optimizer
"""
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
assert f_fp != None, "BFGS requires f_fp"
if self.messages:
iprint = 1
else:
iprint = -1
opt_dict = {}
if self.xtol is not None:
print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
if self.ftol is not None:
print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
if self.bfgs_factor is not None:
opt_dict['factr'] = self.bfgs_factor
opt_result = optimize.fmin_l_bfgs_b(f_fp, x_init, iprint=iprint,
maxfun=self.max_iters, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls']
self.status = rcstrings[opt_result[2]['warnflag']]
#a more helpful error message is available in opt_result in the Error case
if opt_result[2]['warnflag']==2:
self.status = 'Error' + str(opt_result[2]['task'])
class opt_bfgs(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "BFGS (Scipy implementation)"
def opt(self, x_init, f_fp=None, f=None, fp=None):
"""
Run the optimizer
"""
rcstrings = ['','Maximum number of iterations exceeded', 'Gradient and/or function calls not changing']
opt_dict = {}
if self.xtol is not None:
print("WARNING: bfgs doesn't have an xtol arg, so I'm going to ignore it")
if self.ftol is not None:
print("WARNING: bfgs doesn't have an ftol arg, so I'm going to ignore it")
if self.gtol is not None:
opt_dict['pgtol'] = self.gtol
opt_result = optimize.fmin_bfgs(f, x_init, fp, disp=self.messages,
maxiter=self.max_iters, full_output=True, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[4]
self.status = rcstrings[opt_result[6]]
class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
def opt(self, x_init, f_fp=None, f=None, fp=None):
"""
The simplex optimizer does not require gradients.
"""
statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached']
opt_dict = {}
if self.xtol is not None:
opt_dict['xtol'] = self.xtol
if self.ftol is not None:
opt_dict['ftol'] = self.ftol
if self.gtol is not None:
print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
opt_result = optimize.fmin(f, x_init, (), disp=self.messages,
maxfun=self.max_f_eval, full_output=True, **opt_dict)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1]
self.funct_eval = opt_result[3]
self.status = statuses[opt_result[4]]
self.trace = None
class opt_rasm(Optimizer):
def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Rasmussen's Conjugate Gradient"
def opt(self, x_init, f_fp=None, f=None, fp=None):
"""
Run Rasmussen's Conjugate Gradient optimizer
"""
assert f_fp != None, "Rasmussen's minimizer requires f_fp"
statuses = ['Converged', 'Line search failed', 'Maximum number of f evaluations reached',
'NaNs in optimization']
opt_dict = {}
if self.xtol is not None:
print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
if self.ftol is not None:
print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
if self.gtol is not None:
print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
opt_result = rasm.minimize(x_init, f_fp, (), messages=self.messages,
maxnumfuneval=self.max_f_eval)
self.x_opt = opt_result[0]
self.f_opt = opt_result[1][-1]
self.funct_eval = opt_result[2]
self.status = statuses[opt_result[3]]
self.trace = opt_result[1]
class opt_SCG(Optimizer):
def __init__(self, *args, **kwargs):
if 'max_f_eval' in kwargs:
warn("max_f_eval deprecated for SCG optimizer: use max_iters instead!\nIgnoring max_f_eval!", FutureWarning)
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Scaled Conjugate Gradients"
def opt(self, x_init, f_fp=None, f=None, fp=None):
assert not f is None
assert not fp is None
opt_result = SCG(f, fp, x_init, display=self.messages,
maxiters=self.max_iters,
max_f_eval=self.max_f_eval,
xtol=self.xtol, ftol=self.ftol,
gtol=self.gtol)
self.x_opt = opt_result[0]
self.trace = opt_result[1]
self.f_opt = self.trace[-1]
self.funct_eval = opt_result[2]
self.status = opt_result[3]
class Opt_Adadelta(Optimizer):
def __init__(self, step_rate=0.1, decay=0.9, momentum=0, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Adadelta (climin)"
self.step_rate=step_rate
self.decay = decay
self.momentum = momentum
def opt(self, x_init, f_fp=None, f=None, fp=None):
assert not fp is None
import climin
opt = climin.adadelta.Adadelta(x_init, fp, step_rate=self.step_rate, decay=self.decay, momentum=self.momentum)
for info in opt:
if info['n_iter']>=self.max_iters:
self.x_opt = opt.wrt
self.status = 'maximum number of function evaluations exceeded '
break
def get_optimizer(f_min):
optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb,
'org-bfgs': opt_bfgs,
'scg': opt_SCG,
'adadelta':Opt_Adadelta}
if rasm_available:
optimizers['rasmussen'] = opt_rasm
for opt_name in optimizers.keys():
if opt_name.lower().find(f_min.lower()) != -1:
return optimizers[opt_name]
raise KeyError('No optimizer was found matching the name: %s' % f_min)

View file

@ -1,193 +0,0 @@
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2014)
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from __future__ import print_function
import numpy as np
import sys
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
print('\r', end=' ')
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
function_eval number of fn evaluations
status: string describing convergence status
"""
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
function_eval += 1
#if any(np.isnan(gradnew)):
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e15 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
len_maxiters = len(str(maxiters))
if display:
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
exps = exponents(fnow, current_grad)
p_iter = iteration
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d
gplus = gradf(xplus, *optargs)
function_eval += 1
theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa
if delta <= 0:
delta = beta * kappa
beta = beta - theta / kappa
alpha = -mu / delta
# Calculate the comparison ratio.
xnew = x + alpha * d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "maximum number of function evaluations exceeded"
break
return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
n_exps = exponents(fnow, current_grad)
if iteration - p_iter >= 20 * np.random.rand():
a = iteration >= p_iter * 2.78
b = np.any(n_exps < exps)
if a or b:
p_iter = iteration
print('')
if b:
exps = n_exps
if success:
# Test for termination
if (np.abs(fnew - fold) < ftol):
status = 'converged - relative reduction in objective'
break
# return x, flog, function_eval, status
elif (np.max(np.abs(alpha * d)) < xtol):
status = 'converged - relative stepsize'
break
else:
# Update variables for new position
gradold = gradnew
gradnew = gradf(x, *optargs)
function_eval += 1
current_grad = np.dot(gradnew, gradnew)
fold = fnew
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged - relative reduction in gradient'
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
beta = 1. # This is not in the original paper
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = Gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
print("")
print(status)
return x, flog, function_eval, status

View file

@ -1,92 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
def reset(self):
"""
Reset the state of this stochastics generator.
"""
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
import numpy as np
self.Y = model.Y_normalized
bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset()
self.do_stochastics()
def do_stochastics(self):
import numpy as np
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else:
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
if self.missing_data:
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self):
self.current_dim = -1
self.d = None