diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 97601b0f..f1bb5d0d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -207,7 +207,7 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_burnin=100, true_X=False, do_opt=True, max_f_eval=1000): - D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6 + D1, D2, D3, N, M, Q = 15, 8, 8, 350, 3, 6 slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) from GPy.models import mrd diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index ca42acfe..7bfa16ce 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -1,6 +1,6 @@ -#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012) +# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012) -#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman +# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT # HOLDERS AND CONTRIBUTORS "AS IS" AND ANY @@ -25,7 +25,7 @@ import numpy as np import sys -def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6): +def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None): """ Optimisation through Scaled Conjugate Gradients (SCG) @@ -38,19 +38,24 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto flog : a list of all the objective values """ - + if xtol is None: + xtol = 1e-6 + if ftol is None: + ftol = 1e-6 + if gtol is None: + gtol = 1e-5 sigma0 = 1.0e-4 - fold = f(x, *optargs) # Initial function value. + fold = f(x, *optargs) # Initial function value. function_eval = 1 fnow = fold - gradnew = gradf(x, *optargs) # Initial gradient. + gradnew = gradf(x, *optargs) # Initial gradient. gradold = gradnew.copy() - d = -gradnew # Initial search direction. - success = True # Force calculation of directional derivs. - nsuccess = 0 # nsuccess counts number of successes. - beta = 1.0 # Initial scale parameter. - betamin = 1.0e-15 # Lower bound on scale. - betamax = 1.0e100 # Upper bound on scale. + d = -gradnew # Initial search direction. + success = True # Force calculation of directional derivs. + nsuccess = 0 # nsuccess counts number of successes. + beta = 1.0 # Initial scale parameter. + betamin = 1.0e-15 # Lower bound on scale. + betamax = 1.0e100 # Upper bound on scale. status = "Not converged" flog = [fold] @@ -67,21 +72,21 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto d = -gradnew mu = np.dot(d, gradnew) kappa = np.dot(d, d) - sigma = sigma0/np.sqrt(kappa) - xplus = x + sigma*d + sigma = sigma0 / np.sqrt(kappa) + xplus = x + sigma * d gplus = gradf(xplus, *optargs) - theta = np.dot(d, (gplus - gradnew))/sigma + theta = np.dot(d, (gplus - gradnew)) / sigma # Increase effective curvature and evaluate step size alpha. - delta = theta + beta*kappa + delta = theta + beta * kappa if delta <= 0: - delta = beta*kappa - beta = beta - theta/kappa + delta = beta * kappa + beta = beta - theta / kappa - alpha = - mu/delta + alpha = -mu / delta # Calculate the comparison ratio. - xnew = x + alpha*d + xnew = x + alpha * d fnew = f(xnew, *optargs) function_eval += 1 @@ -89,8 +94,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto status = "Maximum number of function evaluations exceeded" return x, flog, function_eval, status - Delta = 2.*(fnew - fold)/(alpha*mu) - if Delta >= 0.: + Delta = 2.*(fnew - fold) / (alpha * mu) + if Delta >= 0.: success = True nsuccess += 1 x = xnew @@ -100,19 +105,19 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto fnow = fold # Store relevant variables - flog.append(fnow) # Current function value + flog.append(fnow) # Current function value iteration += 1 if display: print '\r', - print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta), + print 'i: {0:>5g} f:{1:> 12e} b:{2:> 12e} |g|:{3:> 12e}'.format(iteration, fnow, beta, np.dot(gradnew, gradnew)), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', sys.stdout.flush() if success: # Test for termination - if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol): - status='converged' + if (np.dot(gradnew, gradnew)) <= gtol or (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol): + status = 'converged' return x, flog, function_eval, status else: @@ -121,14 +126,15 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto gradold = gradnew gradnew = gradf(x, *optargs) # If the gradient is zero then we are done. - if np.dot(gradnew,gradnew) == 0: + if (np.dot(gradnew, gradnew)) <= gtol: + status = 'converged' return x, flog, function_eval, status # Adjust beta according to comparison ratio. if Delta < 0.25: - beta = min(4.0*beta, betamax) + beta = min(4.0 * beta, betamax) if Delta > 0.75: - beta = max(0.5*beta, betamin) + beta = max(0.5 * beta, betamin) # Update search direction using Polak-Ribiere formula, or re-start # in direction of negative gradient after nparams steps. @@ -136,8 +142,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto d = -gradnew nsuccess = 0 elif success: - gamma = np.dot(gradold - gradnew,gradnew)/(mu) - d = gamma*d - gradnew + gamma = np.dot(gradold - gradnew, gradnew) / (mu) + d = gamma * d - gradnew # If we get here, then we haven't terminated in the given number of # iterations. diff --git a/GPy/inference/conjugate_gradient_descent.py b/GPy/inference/conjugate_gradient_descent.py index c9981fe8..2fcd9ba0 100644 --- a/GPy/inference/conjugate_gradient_descent.py +++ b/GPy/inference/conjugate_gradient_descent.py @@ -12,6 +12,7 @@ from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2 from threading import Thread import numpy import sys +import time RUNNING = "running" CONVERGED = "converged" @@ -71,7 +72,10 @@ class _Async_Optimization(Thread): def callback_return(self, *a): self.callback(*a) - self.callback(self.SENTINEL) + if self.outq is not None: + self.outq.put(self.SENTINEL) + if self.messages: + print "" self.runsignal.clear() def run(self, *args, **kwargs): @@ -105,7 +109,7 @@ class _CGDAsync(_Async_Optimization): status = MAX_F_EVAL gi = -self.df(xi, *a, **kw) - if numpy.dot(gi.T, gi) < self.gtol: + if numpy.dot(gi.T, gi) <= self.gtol: status = CONVERGED break if numpy.isnan(numpy.dot(gi.T, gi)): @@ -123,10 +127,8 @@ class _CGDAsync(_Async_Optimization): xi, si, gi, fi, fi_old) - if alphai is not None and fi2 < fi: - fi, fi_old = fi2, fi_old2 - else: - alphai, _, _, fi, fi_old, gfi = \ + if alphai is None: + alphai, _, _, fi2, fi_old2, gfi = \ line_search_wolfe2(self.f, self.df, xi, si, gi, fi, fi_old) @@ -134,11 +136,14 @@ class _CGDAsync(_Async_Optimization): # This line search also failed to find a better solution. status = LINE_SEARCH break + if fi2 < fi: + fi, fi_old = fi2, fi_old2 if gfi is not None: gi = gfi if numpy.isnan(fi) or fi_old < fi: gi, ur, si = self.reset(xi, *a, **kw) + else: xi += numpy.dot(alphai, si) if self.messages: @@ -164,6 +169,7 @@ class Async_Optimize(object): try: for ret in iter(lambda: q.get(timeout=1), self.SENTINEL): self.callback(*ret) + self.runsignal.clear() except Empty: pass @@ -193,13 +199,21 @@ class Async_Optimize(object): while self.runsignal.is_set(): try: p.join(1) - # c.join(1) + c.join(1) except KeyboardInterrupt: # print "^C" self.runsignal.clear() p.join() c.join() if c and c.is_alive(): +# self.runsignal.set() +# while self.runsignal.is_set(): +# try: +# c.join(.1) +# except KeyboardInterrupt: +# # print "^C" +# self.runsignal.clear() +# c.join() print "WARNING: callback still running, optimisation done!" return p.result diff --git a/GPy/inference/gradient_descent_update_rules.py b/GPy/inference/gradient_descent_update_rules.py index b3ccb2ce..1c14ed63 100644 --- a/GPy/inference/gradient_descent_update_rules.py +++ b/GPy/inference/gradient_descent_update_rules.py @@ -41,3 +41,13 @@ class FletcherReeves(GDUpdateRule): if tmp: return tmp / numpy.dot(self.gradold.T, self.gradnatold) return tmp + +class PolakRibiere(GDUpdateRule): + ''' + Fletcher Reeves update rule for gamma + ''' + def _gamma(self, *a, **kw): + tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat) + if tmp: + return tmp / numpy.dot(self.gradold.T, self.gradnatold) + return tmp diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index 75cd94ba..e208392b 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -29,7 +29,8 @@ class Optimizer(): :rtype: optimizer object. """ - def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, max_iters = 1e3, ftol=None, gtol=None, xtol=None): + def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3, + ftol=None, gtol=None, xtol=None): self.opt_name = None self.x_init = x_init self.messages = messages @@ -205,7 +206,11 @@ class opt_SCG(Optimizer): def opt(self, f_fp = None, f = None, fp = None): assert not f is None assert not fp is None - opt_result = SCG(f,fp,self.x_init, display=self.messages, maxiters=self.max_iters, max_f_eval=self.max_f_eval, xtol=self.xtol, ftol=self.ftol) + opt_result = SCG(f, fp, self.x_init, display=self.messages, + maxiters=self.max_iters, + max_f_eval=self.max_f_eval, + xtol=self.xtol, ftol=self.ftol, + gtol=self.gtol) self.x_opt = opt_result[0] self.trace = opt_result[1] self.f_opt = self.trace[-1] diff --git a/GPy/testing/cgd_tests.py b/GPy/testing/cgd_tests.py index 79e5e08b..19c1c21b 100644 --- a/GPy/testing/cgd_tests.py +++ b/GPy/testing/cgd_tests.py @@ -9,6 +9,7 @@ from GPy.inference.conjugate_gradient_descent import CGD, RUNNING import pylab import time from scipy.optimize.optimize import rosen, rosen_der +from GPy.inference.gradient_descent_update_rules import PolakRibiere class Test(unittest.TestCase): @@ -71,10 +72,12 @@ if __name__ == "__main__": # df = lambda x: numpy.dot(A, x) - b f = rosen df = rosen_der - x0 = numpy.random.randn(N) * .5 + x0 = (numpy.random.randn(N) * .5) + numpy.ones(N) + print x0 opt = CGD() + pylab.ion() fig = pylab.figure("cgd optimize") if fig.axes: ax = fig.axes[0] @@ -83,13 +86,13 @@ if __name__ == "__main__": ax = fig.add_subplot(111, projection='3d') interpolation = 40 - x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None] + x, y = numpy.linspace(.5, 1.5, interpolation)[:, None], numpy.linspace(.5, 1.5, interpolation)[:, None] X, Y = numpy.meshgrid(x, y) fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation) ax.plot_wireframe(X, Y, fXY) xopts = [x0.copy()] - optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r') + optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='', color='r') raw_input("enter to start optimize") res = [0] @@ -102,11 +105,7 @@ if __name__ == "__main__": if r[-1] != RUNNING: res[0] = r - p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000, - report_every=20, gtol=1e-12) - - - pylab.ion() - pylab.show() + res[0] = opt.opt(f, df, x0.copy(), callback, messages=True, maxiter=1000, + report_every=7, gtol=1e-12, update_rule=PolakRibiere) pass