GPy/GPy/inference/SCG.py
2013-05-20 10:11:27 +01:00

162 lines
5.5 KiB
Python

# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
function_eval number of fn evaluations
status: string describing convergence status
"""
print " SCG"
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters)))
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa
if delta <= 0:
delta = beta * kappa
beta = beta - theta / kappa
alpha = -mu / delta
# Calculate the comparison ratio.
xnew = x + alpha * d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print '\r',
print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
if success:
# Test for termination
if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol):
status = 'converged'
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
current_grad = np.dot(gradnew, gradnew)
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged'
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.5 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
# beta = 1. # TODO: betareset!!
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
if display:
print ""
return x, flog, function_eval, status