mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 14:32:37 +02:00
new termination rule for scg
This commit is contained in:
parent
0e8bc5662a
commit
803a1d99ed
6 changed files with 84 additions and 50 deletions
|
|
@ -207,7 +207,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False,
|
||||||
max_burnin=100, true_X=False,
|
max_burnin=100, true_X=False,
|
||||||
do_opt=True,
|
do_opt=True,
|
||||||
max_f_eval=1000):
|
max_f_eval=1000):
|
||||||
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
|
D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6
|
||||||
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
||||||
|
|
||||||
from GPy.models import mrd
|
from GPy.models import mrd
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
|
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
|
||||||
|
|
||||||
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
||||||
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
||||||
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
|
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
|
||||||
"""
|
"""
|
||||||
Optimisation through Scaled Conjugate Gradients (SCG)
|
Optimisation through Scaled Conjugate Gradients (SCG)
|
||||||
|
|
||||||
|
|
@ -38,19 +38,24 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
flog : a list of all the objective values
|
flog : a list of all the objective values
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
if xtol is None:
|
||||||
|
xtol = 1e-6
|
||||||
|
if ftol is None:
|
||||||
|
ftol = 1e-6
|
||||||
|
if gtol is None:
|
||||||
|
gtol = 1e-5
|
||||||
sigma0 = 1.0e-4
|
sigma0 = 1.0e-4
|
||||||
fold = f(x, *optargs) # Initial function value.
|
fold = f(x, *optargs) # Initial function value.
|
||||||
function_eval = 1
|
function_eval = 1
|
||||||
fnow = fold
|
fnow = fold
|
||||||
gradnew = gradf(x, *optargs) # Initial gradient.
|
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||||
gradold = gradnew.copy()
|
gradold = gradnew.copy()
|
||||||
d = -gradnew # Initial search direction.
|
d = -gradnew # Initial search direction.
|
||||||
success = True # Force calculation of directional derivs.
|
success = True # Force calculation of directional derivs.
|
||||||
nsuccess = 0 # nsuccess counts number of successes.
|
nsuccess = 0 # nsuccess counts number of successes.
|
||||||
beta = 1.0 # Initial scale parameter.
|
beta = 1.0 # Initial scale parameter.
|
||||||
betamin = 1.0e-15 # Lower bound on scale.
|
betamin = 1.0e-15 # Lower bound on scale.
|
||||||
betamax = 1.0e100 # Upper bound on scale.
|
betamax = 1.0e100 # Upper bound on scale.
|
||||||
status = "Not converged"
|
status = "Not converged"
|
||||||
|
|
||||||
flog = [fold]
|
flog = [fold]
|
||||||
|
|
@ -67,21 +72,21 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
d = -gradnew
|
d = -gradnew
|
||||||
mu = np.dot(d, gradnew)
|
mu = np.dot(d, gradnew)
|
||||||
kappa = np.dot(d, d)
|
kappa = np.dot(d, d)
|
||||||
sigma = sigma0/np.sqrt(kappa)
|
sigma = sigma0 / np.sqrt(kappa)
|
||||||
xplus = x + sigma*d
|
xplus = x + sigma * d
|
||||||
gplus = gradf(xplus, *optargs)
|
gplus = gradf(xplus, *optargs)
|
||||||
theta = np.dot(d, (gplus - gradnew))/sigma
|
theta = np.dot(d, (gplus - gradnew)) / sigma
|
||||||
|
|
||||||
# Increase effective curvature and evaluate step size alpha.
|
# Increase effective curvature and evaluate step size alpha.
|
||||||
delta = theta + beta*kappa
|
delta = theta + beta * kappa
|
||||||
if delta <= 0:
|
if delta <= 0:
|
||||||
delta = beta*kappa
|
delta = beta * kappa
|
||||||
beta = beta - theta/kappa
|
beta = beta - theta / kappa
|
||||||
|
|
||||||
alpha = - mu/delta
|
alpha = -mu / delta
|
||||||
|
|
||||||
# Calculate the comparison ratio.
|
# Calculate the comparison ratio.
|
||||||
xnew = x + alpha*d
|
xnew = x + alpha * d
|
||||||
fnew = f(xnew, *optargs)
|
fnew = f(xnew, *optargs)
|
||||||
function_eval += 1
|
function_eval += 1
|
||||||
|
|
||||||
|
|
@ -89,8 +94,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
status = "Maximum number of function evaluations exceeded"
|
status = "Maximum number of function evaluations exceeded"
|
||||||
return x, flog, function_eval, status
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
Delta = 2.*(fnew - fold)/(alpha*mu)
|
Delta = 2.*(fnew - fold) / (alpha * mu)
|
||||||
if Delta >= 0.:
|
if Delta >= 0.:
|
||||||
success = True
|
success = True
|
||||||
nsuccess += 1
|
nsuccess += 1
|
||||||
x = xnew
|
x = xnew
|
||||||
|
|
@ -100,19 +105,19 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
fnow = fold
|
fnow = fold
|
||||||
|
|
||||||
# Store relevant variables
|
# Store relevant variables
|
||||||
flog.append(fnow) # Current function value
|
flog.append(fnow) # Current function value
|
||||||
|
|
||||||
iteration += 1
|
iteration += 1
|
||||||
if display:
|
if display:
|
||||||
print '\r',
|
print '\r',
|
||||||
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
|
print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)),
|
||||||
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
# Test for termination
|
# Test for termination
|
||||||
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
|
if (np.dot(gradnew, gradnew)) <= gtol or (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
|
||||||
status='converged'
|
status = 'converged'
|
||||||
return x, flog, function_eval, status
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -121,14 +126,15 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
gradold = gradnew
|
gradold = gradnew
|
||||||
gradnew = gradf(x, *optargs)
|
gradnew = gradf(x, *optargs)
|
||||||
# If the gradient is zero then we are done.
|
# If the gradient is zero then we are done.
|
||||||
if np.dot(gradnew,gradnew) == 0:
|
if (np.dot(gradnew, gradnew)) <= gtol:
|
||||||
|
status = 'converged'
|
||||||
return x, flog, function_eval, status
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
# Adjust beta according to comparison ratio.
|
# Adjust beta according to comparison ratio.
|
||||||
if Delta < 0.25:
|
if Delta < 0.25:
|
||||||
beta = min(4.0*beta, betamax)
|
beta = min(4.0 * beta, betamax)
|
||||||
if Delta > 0.75:
|
if Delta > 0.75:
|
||||||
beta = max(0.5*beta, betamin)
|
beta = max(0.5 * beta, betamin)
|
||||||
|
|
||||||
# Update search direction using Polak-Ribiere formula, or re-start
|
# Update search direction using Polak-Ribiere formula, or re-start
|
||||||
# in direction of negative gradient after nparams steps.
|
# in direction of negative gradient after nparams steps.
|
||||||
|
|
@ -136,8 +142,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
d = -gradnew
|
d = -gradnew
|
||||||
nsuccess = 0
|
nsuccess = 0
|
||||||
elif success:
|
elif success:
|
||||||
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
|
gamma = np.dot(gradold - gradnew, gradnew) / (mu)
|
||||||
d = gamma*d - gradnew
|
d = gamma * d - gradnew
|
||||||
|
|
||||||
# If we get here, then we haven't terminated in the given number of
|
# If we get here, then we haven't terminated in the given number of
|
||||||
# iterations.
|
# iterations.
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
import numpy
|
import numpy
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
RUNNING = "running"
|
RUNNING = "running"
|
||||||
CONVERGED = "converged"
|
CONVERGED = "converged"
|
||||||
|
|
@ -71,7 +72,10 @@ class _Async_Optimization(Thread):
|
||||||
|
|
||||||
def callback_return(self, *a):
|
def callback_return(self, *a):
|
||||||
self.callback(*a)
|
self.callback(*a)
|
||||||
self.callback(self.SENTINEL)
|
if self.outq is not None:
|
||||||
|
self.outq.put(self.SENTINEL)
|
||||||
|
if self.messages:
|
||||||
|
print ""
|
||||||
self.runsignal.clear()
|
self.runsignal.clear()
|
||||||
|
|
||||||
def run(self, *args, **kwargs):
|
def run(self, *args, **kwargs):
|
||||||
|
|
@ -105,7 +109,7 @@ class _CGDAsync(_Async_Optimization):
|
||||||
status = MAX_F_EVAL
|
status = MAX_F_EVAL
|
||||||
|
|
||||||
gi = -self.df(xi, *a, **kw)
|
gi = -self.df(xi, *a, **kw)
|
||||||
if numpy.dot(gi.T, gi) < self.gtol:
|
if numpy.dot(gi.T, gi) <= self.gtol:
|
||||||
status = CONVERGED
|
status = CONVERGED
|
||||||
break
|
break
|
||||||
if numpy.isnan(numpy.dot(gi.T, gi)):
|
if numpy.isnan(numpy.dot(gi.T, gi)):
|
||||||
|
|
@ -123,10 +127,8 @@ class _CGDAsync(_Async_Optimization):
|
||||||
xi,
|
xi,
|
||||||
si, gi,
|
si, gi,
|
||||||
fi, fi_old)
|
fi, fi_old)
|
||||||
if alphai is not None and fi2 < fi:
|
if alphai is None:
|
||||||
fi, fi_old = fi2, fi_old2
|
alphai, _, _, fi2, fi_old2, gfi = \
|
||||||
else:
|
|
||||||
alphai, _, _, fi, fi_old, gfi = \
|
|
||||||
line_search_wolfe2(self.f, self.df,
|
line_search_wolfe2(self.f, self.df,
|
||||||
xi, si, gi,
|
xi, si, gi,
|
||||||
fi, fi_old)
|
fi, fi_old)
|
||||||
|
|
@ -134,11 +136,14 @@ class _CGDAsync(_Async_Optimization):
|
||||||
# This line search also failed to find a better solution.
|
# This line search also failed to find a better solution.
|
||||||
status = LINE_SEARCH
|
status = LINE_SEARCH
|
||||||
break
|
break
|
||||||
|
if fi2 < fi:
|
||||||
|
fi, fi_old = fi2, fi_old2
|
||||||
if gfi is not None:
|
if gfi is not None:
|
||||||
gi = gfi
|
gi = gfi
|
||||||
|
|
||||||
if numpy.isnan(fi) or fi_old < fi:
|
if numpy.isnan(fi) or fi_old < fi:
|
||||||
gi, ur, si = self.reset(xi, *a, **kw)
|
gi, ur, si = self.reset(xi, *a, **kw)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
xi += numpy.dot(alphai, si)
|
xi += numpy.dot(alphai, si)
|
||||||
if self.messages:
|
if self.messages:
|
||||||
|
|
@ -164,6 +169,7 @@ class Async_Optimize(object):
|
||||||
try:
|
try:
|
||||||
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
||||||
self.callback(*ret)
|
self.callback(*ret)
|
||||||
|
self.runsignal.clear()
|
||||||
except Empty:
|
except Empty:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -193,13 +199,21 @@ class Async_Optimize(object):
|
||||||
while self.runsignal.is_set():
|
while self.runsignal.is_set():
|
||||||
try:
|
try:
|
||||||
p.join(1)
|
p.join(1)
|
||||||
# c.join(1)
|
c.join(1)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
# print "^C"
|
# print "^C"
|
||||||
self.runsignal.clear()
|
self.runsignal.clear()
|
||||||
p.join()
|
p.join()
|
||||||
c.join()
|
c.join()
|
||||||
if c and c.is_alive():
|
if c and c.is_alive():
|
||||||
|
# self.runsignal.set()
|
||||||
|
# while self.runsignal.is_set():
|
||||||
|
# try:
|
||||||
|
# c.join(.1)
|
||||||
|
# except KeyboardInterrupt:
|
||||||
|
# # print "^C"
|
||||||
|
# self.runsignal.clear()
|
||||||
|
# c.join()
|
||||||
print "WARNING: callback still running, optimisation done!"
|
print "WARNING: callback still running, optimisation done!"
|
||||||
return p.result
|
return p.result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,3 +41,13 @@ class FletcherReeves(GDUpdateRule):
|
||||||
if tmp:
|
if tmp:
|
||||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||||
return tmp
|
return tmp
|
||||||
|
|
||||||
|
class PolakRibiere(GDUpdateRule):
|
||||||
|
'''
|
||||||
|
Fletcher Reeves update rule for gamma
|
||||||
|
'''
|
||||||
|
def _gamma(self, *a, **kw):
|
||||||
|
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
|
||||||
|
if tmp:
|
||||||
|
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||||
|
return tmp
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,8 @@ class Optimizer():
|
||||||
:rtype: optimizer object.
|
:rtype: optimizer object.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None):
|
def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
|
||||||
|
ftol=None, gtol=None, xtol=None):
|
||||||
self.opt_name = None
|
self.opt_name = None
|
||||||
self.x_init = x_init
|
self.x_init = x_init
|
||||||
self.messages = messages
|
self.messages = messages
|
||||||
|
|
@ -205,7 +206,11 @@ class opt_SCG(Optimizer):
|
||||||
def opt(self, f_fp = None, f = None, fp = None):
|
def opt(self, f_fp = None, f = None, fp = None):
|
||||||
assert not f is None
|
assert not f is None
|
||||||
assert not fp is None
|
assert not fp is None
|
||||||
opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol)
|
opt_result = SCG(f, fp, self.x_init, display=self.messages,
|
||||||
|
maxiters=self.max_iters,
|
||||||
|
max_f_eval=self.max_f_eval,
|
||||||
|
xtol=self.xtol, ftol=self.ftol,
|
||||||
|
gtol=self.gtol)
|
||||||
self.x_opt = opt_result[0]
|
self.x_opt = opt_result[0]
|
||||||
self.trace = opt_result[1]
|
self.trace = opt_result[1]
|
||||||
self.f_opt = self.trace[-1]
|
self.f_opt = self.trace[-1]
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
|
||||||
import pylab
|
import pylab
|
||||||
import time
|
import time
|
||||||
from scipy.optimize.optimize import rosen, rosen_der
|
from scipy.optimize.optimize import rosen, rosen_der
|
||||||
|
from GPy.inference.gradient_descent_update_rules import PolakRibiere
|
||||||
|
|
||||||
|
|
||||||
class Test(unittest.TestCase):
|
class Test(unittest.TestCase):
|
||||||
|
|
@ -71,10 +72,12 @@ if __name__ == "__main__":
|
||||||
# df = lambda x: numpy.dot(A, x) - b
|
# df = lambda x: numpy.dot(A, x) - b
|
||||||
f = rosen
|
f = rosen
|
||||||
df = rosen_der
|
df = rosen_der
|
||||||
x0 = numpy.random.randn(N) * .5
|
x0 = (numpy.random.randn(N) * .5) + numpy.ones(N)
|
||||||
|
print x0
|
||||||
|
|
||||||
opt = CGD()
|
opt = CGD()
|
||||||
|
|
||||||
|
pylab.ion()
|
||||||
fig = pylab.figure("cgd optimize")
|
fig = pylab.figure("cgd optimize")
|
||||||
if fig.axes:
|
if fig.axes:
|
||||||
ax = fig.axes[0]
|
ax = fig.axes[0]
|
||||||
|
|
@ -83,13 +86,13 @@ if __name__ == "__main__":
|
||||||
ax = fig.add_subplot(111, projection='3d')
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
|
||||||
interpolation = 40
|
interpolation = 40
|
||||||
x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None]
|
x, y = numpy.linspace(.5, 1.5, interpolation)[:, None], numpy.linspace(.5, 1.5, interpolation)[:, None]
|
||||||
X, Y = numpy.meshgrid(x, y)
|
X, Y = numpy.meshgrid(x, y)
|
||||||
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
|
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
|
||||||
|
|
||||||
ax.plot_wireframe(X, Y, fXY)
|
ax.plot_wireframe(X, Y, fXY)
|
||||||
xopts = [x0.copy()]
|
xopts = [x0.copy()]
|
||||||
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
|
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='', color='r')
|
||||||
|
|
||||||
raw_input("enter to start optimize")
|
raw_input("enter to start optimize")
|
||||||
res = [0]
|
res = [0]
|
||||||
|
|
@ -102,11 +105,7 @@ if __name__ == "__main__":
|
||||||
if r[-1] != RUNNING:
|
if r[-1] != RUNNING:
|
||||||
res[0] = r
|
res[0] = r
|
||||||
|
|
||||||
p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
|
res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000,
|
||||||
report_every=20, gtol=1e-12)
|
report_every=7, gtol=1e-12, update_rule=PolakRibiere)
|
||||||
|
|
||||||
|
|
||||||
pylab.ion()
|
|
||||||
pylab.show()
|
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue