merge with current GPy devel

This commit is contained in:
Zhenwen Dai 2015-12-09 17:27:06 +00:00
commit c0e6978054
164 changed files with 733 additions and 5931 deletions

View file

@ -1,3 +1,7 @@
from . import latent_function_inference
from . import optimization
from . import latent_function_inference
from . import mcmc
import sys
sys.modules['GPy.inference.optimization'] = optimization
sys.modules['GPy.inference.optimization.optimization'] = optimization

View file

@ -22,7 +22,7 @@ class ExactGaussianInference(LatentFunctionInference):
def __init__(self):
pass#self._YYTfactor_cache = caching.cache()
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None, Z_tilde=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
@ -49,9 +49,15 @@ class ExactGaussianInference(LatentFunctionInference):
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
if Z_tilde is not None:
# This is a correction term for the log marginal likelihood
# In EP this is log Z_tilde, which is the difference between the
# Gaussian marginal and Z_EP
log_marginal += Z_tilde
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}

View file

@ -2,14 +2,14 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import jitchol, DSYR, dtrtrs, dtrtri
from ...core.parameterization.observable_array import ObsAr
from paramz import ObsAr
from . import ExactGaussianInference, VarDTC
from ...util import diag
log_2_pi = np.log(2*np.pi)
class EPBase(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
@ -20,8 +20,12 @@ class EPBase(object):
:type eta: float64
:param delta: damping EP updates factor.
:type delta: float64
:param always_reset: setting to always reset the approximation at the beginning of every inference call.
:type always_reest: boolean
"""
super(EPBase, self).__init__()
self.always_reset = always_reset
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset()
@ -38,37 +42,46 @@ class EPBase(object):
class EP(EPBase, ExactGaussianInference):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
if self.always_reset:
self.reset()
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
assert output_dim == 1, "ep in 1D only (for now!)"
if K is None:
K = kern.K(X)
if self._ep_approximation is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K)
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
Sigma = K.copy()
diag.add(Sigma, 1e-7)
# Makes computing the sign quicker if we work with numpy arrays rather
# than ObsArrays
Y = Y.values.copy()
#Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(num_data,dtype=np.float64)
sigma2_hat = np.empty(num_data,dtype=np.float64)
tau_cav = np.empty(num_data,dtype=np.float64)
v_cav = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
@ -80,22 +93,32 @@ class EP(EPBase, ExactGaussianInference):
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
tau_cav[i] = 1./Sigma[i,i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][i, :]
else:
Y_metadata_i = None
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde[i] += delta_tau
v_tilde[i] += delta_v
#Posterior distribution parameters update
DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
DSYR(Sigma, Sigma[:,i].copy(), -ci)
mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
@ -108,7 +131,7 @@ class EP(EPBase, ExactGaussianInference):
mu = np.dot(Sigma,v_tilde)
#monitor convergence
if iterations>0:
if iterations > 0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
@ -117,7 +140,11 @@ class EP(EPBase, ExactGaussianInference):
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
return mu, Sigma, mu_tilde, tau_tilde, Z_tilde
class EPDTC(EPBase, VarDTC):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
@ -133,16 +160,16 @@ class EPDTC(EPBase, VarDTC):
Kmn = psi1.T
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
mean_function=mean_function,
Y_metadata=Y_metadata,
precision=tau_tilde,
Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2)
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=np.log(Z_tilde).sum())
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
num_data, output_dim = Y.shape
@ -167,6 +194,9 @@ class EPDTC(EPBase, VarDTC):
mu_hat = np.zeros(num_data,dtype=np.float64)
sigma2_hat = np.zeros(num_data,dtype=np.float64)
tau_cav = np.empty(num_data,dtype=np.float64)
v_cav = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
@ -186,10 +216,10 @@ class EPDTC(EPBase, VarDTC):
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i])#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -233,5 +263,8 @@ class EPDTC(EPBase, VarDTC):
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_hat
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_tilde

View file

@ -3,9 +3,8 @@
import numpy as np
from ...core import Model
from ...core.parameterization import variational
from GPy.core.parameterization import variational
from ...util.linalg import tdot
from GPy.core.parameterization.variational import VariationalPosterior
def infer_newX(model, Y_new, optimize=True, init='L2'):
"""
@ -62,14 +61,12 @@ class InferenceX(Model):
# self.kern.GPU(True)
from copy import deepcopy
self.posterior = deepcopy(model.posterior)
from ...core.parameterization.variational import VariationalPosterior
if isinstance(model.X, VariationalPosterior):
if isinstance(model.X, variational.VariationalPosterior):
self.uncertain_input = True
from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
self.variational_prior = variational.SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
else:
self.variational_prior = model.variational_prior.copy()
else:
@ -105,17 +102,16 @@ class InferenceX(Model):
idx = dist.argmin(axis=1)
from ...models import SSGPLVM
from ...util.misc import param_to_array
if isinstance(model, SSGPLVM):
X = variational.SpikeAndSlabPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]), param_to_array(model.X.gamma[idx]))
X = variational.SpikeAndSlabPosterior((model.X.mean[idx].values), (model.X.variance[idx].values), (model.X.gamma[idx].values))
if model.group_spike:
X.gamma.fix()
else:
if self.uncertain_input and self.sparse_gp:
X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
X = variational.NormalPosterior((model.X.mean[idx].values), (model.X.variance[idx].values))
else:
from ...core import Param
X = Param('latent mean',param_to_array(model.X[idx]).copy())
X = Param('latent mean',(model.X[idx].values).copy())
return X
@ -160,8 +156,7 @@ class InferenceX(Model):
self.X.gradient = X_grad
if self.uncertain_input:
from ...core.parameterization.variational import SpikeAndSlabPrior
if isinstance(self.variational_prior, SpikeAndSlabPrior):
if isinstance(self.variational_prior, variational.SpikeAndSlabPrior):
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X)
# update for the KL divergence

View file

@ -4,7 +4,7 @@
from .posterior import Posterior
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
from GPy.core.parameterization.variational import VariationalPosterior
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
@ -23,8 +23,7 @@ class VarDTC(LatentFunctionInference):
"""
const_jitter = 1e-8
def __init__(self, limit=1):
#self._YYTfactor_cache = caching.cache()
from ...util.caching import Cacher
from paramz.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
@ -45,7 +44,7 @@ class VarDTC(LatentFunctionInference):
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
from paramz.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
@ -64,7 +63,7 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, Z_tilde=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape
@ -152,6 +151,12 @@ class VarDTC(LatentFunctionInference):
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, precision, het_noise,
psi0, A, LB, trYYT, data_fit, Y)
if Z_tilde is not None:
# This is a correction term for the log marginal likelihood
# In EP this is log Z_tilde, which is the difference between the
# Gaussian marginal and Z_EP
log_marginal += Z_tilde
#noise derivatives
dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB,

View file

@ -4,7 +4,7 @@
from .posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
from GPy.core.parameterization.variational import VariationalPosterior
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
@ -28,7 +28,7 @@ class VarDTC_minibatch(LatentFunctionInference):
self.limit = limit
# Cache functions
from ...util.caching import Cacher
from paramz.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
@ -46,7 +46,7 @@ class VarDTC_minibatch(LatentFunctionInference):
self.mpi_comm = None
self.midRes = {}
self.batch_pos = 0
from ...util.caching import Cacher
from paramz.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)

View file

@ -1,2 +1,5 @@
from .scg import SCG
from .optimization import *
from paramz.optimization import stochastics, Optimizer
from paramz.optimization import *
import sys
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer

View file

@ -1,285 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .gradient_descent_update_rules import FletcherReeves, \
PolakRibiere
from Queue import Empty
from multiprocessing import Value
from multiprocessing.queues import Queue
from multiprocessing.synchronize import Event
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
from threading import Thread
import numpy
import sys
import time
RUNNING = "running"
CONVERGED = "converged"
MAXITER = "maximum number of iterations reached"
MAX_F_EVAL = "maximum number of function calls reached"
LINE_SEARCH = "line search failed"
KBINTERRUPT = "interrupted"
class _Async_Optimization(Thread):
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
gtol=1e-6, outqueue=None, *args, **kw):
"""
Helper Process class for async optimization
f_call and df_call are Multiprocessing Values, for synchronized assignment
"""
self.f_call = Value('i', 0)
self.df_call = Value('i', 0)
self.f = self.f_wrapper(f, self.f_call)
self.df = self.f_wrapper(df, self.df_call)
self.x0 = x0
self.update_rule = update_rule
self.report_every = report_every
self.messages = messages
self.maxiter = maxiter
self.max_f_eval = max_f_eval
self.gtol = gtol
self.SENTINEL = SENTINEL
self.runsignal = runsignal
# self.parent = parent
# self.result = None
self.outq = outqueue
super(_Async_Optimization, self).__init__(target=self.run,
name="CG Optimization",
*args, **kw)
# def __enter__(self):
# return self
#
# def __exit__(self, type, value, traceback):
# return isinstance(value, TypeError)
def f_wrapper(self, f, counter):
def f_w(*a, **kw):
counter.value += 1
return f(*a, **kw)
return f_w
def callback(self, *a):
if self.outq is not None:
self.outq.put(a)
# self.parent and self.parent.callback(*a, **kw)
pass
# print "callback done"
def callback_return(self, *a):
self.callback(*a)
if self.outq is not None:
self.outq.put(self.SENTINEL)
if self.messages:
print("")
self.runsignal.clear()
def run(self, *args, **kwargs):
raise NotImplementedError("Overwrite this with optimization (for async use)")
pass
class _CGDAsync(_Async_Optimization):
def reset(self, xi, *a, **kw):
gi = -self.df(xi, *a, **kw)
si = gi
ur = self.update_rule(gi)
return gi, ur, si
def run(self, *a, **kw):
status = RUNNING
fi = self.f(self.x0)
fi_old = fi + 5000
gi, ur, si = self.reset(self.x0, *a, **kw)
xi = self.x0
xi_old = numpy.nan
it = 0
while it < self.maxiter:
if not self.runsignal.is_set():
break
if self.f_call.value > self.max_f_eval:
status = MAX_F_EVAL
gi = -self.df(xi, *a, **kw)
if numpy.dot(gi.T, gi) <= self.gtol:
status = CONVERGED
break
if numpy.isnan(numpy.dot(gi.T, gi)):
if numpy.any(numpy.isnan(xi_old)):
status = CONVERGED
break
self.reset(xi_old)
gammai = ur(gi)
if gammai < 1e-6 or it % xi.shape[0] == 0:
gi, ur, si = self.reset(xi, *a, **kw)
si = gi + gammai * si
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
self.df,
xi,
si, gi,
fi, fi_old)
if alphai is None:
alphai, _, _, fi2, fi_old2, gfi = \
line_search_wolfe2(self.f, self.df,
xi, si, gi,
fi, fi_old)
if alphai is None:
# This line search also failed to find a better solution.
status = LINE_SEARCH
break
if fi2 < fi:
fi, fi_old = fi2, fi_old2
if gfi is not None:
gi = gfi
if numpy.isnan(fi) or fi_old < fi:
gi, ur, si = self.reset(xi, *a, **kw)
else:
xi += numpy.dot(alphai, si)
if self.messages:
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
if it % self.report_every == 0:
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
it += 1
else:
status = MAXITER
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
class Async_Optimize(object):
callback = lambda *x: None
runsignal = Event()
SENTINEL = "SENTINEL"
def async_callback_collect(self, q):
while self.runsignal.is_set():
try:
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
self.callback(*ret)
self.runsignal.clear()
except Empty:
pass
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
self.runsignal.set()
c = None
outqueue = None
if callback:
outqueue = Queue()
self.callback = callback
c = Thread(target=self.async_callback_collect, args=(outqueue,))
c.start()
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
report_every=report_every, messages=messages, maxiter=maxiter,
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
p.start()
return p, c
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs):
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
maxiter, max_f_eval, gtol,
report_every, *args, **kwargs)
while self.runsignal.is_set():
try:
p.join(1)
if c: c.join(1)
except KeyboardInterrupt:
# print "^C"
self.runsignal.clear()
p.join()
if c: c.join()
if c and c.is_alive():
# self.runsignal.set()
# while self.runsignal.is_set():
# try:
# c.join(.1)
# except KeyboardInterrupt:
# # print "^C"
# self.runsignal.clear()
# c.join()
print("WARNING: callback still running, optimisation done!")
return p.result
class CGD(Async_Optimize):
'''
Conjugate gradient descent algorithm to minimize
function f with gradients df, starting at x0
with update rule update_rule
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
'''
opt_name = "Conjugate Gradient Descent"
def opt_async(self, *a, **kw):
"""
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, \*args, \*\*kwargs)
callback gets called every `report_every` iterations
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, \*args, \*\*kwargs)
df(xi, \*args, \*\*kwargs)
**Returns:**
Started `Process` object, optimizing asynchronously
**Calls:**
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
at end of optimization!
"""
return super(CGD, self).opt_async(*a, **kw)
def opt(self, *a, **kw):
"""
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, \*args, \*\*kwargs)
Minimize f, calling callback every `report_every` iterations with following syntax:
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
if df returns tuple (grad, natgrad) it will optimize according
to natural gradient rules
f, and df will be called with
f(xi, \*args, \*\*kwargs)
df(xi, \*args, \*\*kwargs)
**returns**
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
at end of optimization
"""
return super(CGD, self).opt(*a, **kw)

View file

@ -1,51 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy
class GDUpdateRule():
_gradnat = None
_gradnatold = None
def __init__(self, initgrad, initgradnat=None):
self.grad = initgrad
if initgradnat:
self.gradnat = initgradnat
else:
self.gradnat = initgrad
# self.grad, self.gradnat
def _gamma(self):
raise NotImplemented("""Implement gamma update rule here,
you can use self.grad and self.gradold for parameters, as well as
self.gradnat and self.gradnatold for natural gradients.""")
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
"""
Return gamma for given gradients and optional natural gradients
"""
if not gradnat:
gradnat = grad
self.gradold = self.grad
self.gradnatold = self.gradnat
self.grad = grad
self.gradnat = gradnat
self.si = si
return self._gamma(*args, **kw)
class FletcherReeves(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot(self.grad.T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp
class PolakRibiere(GDUpdateRule):
'''
Fletcher Reeves update rule for gamma
'''
def _gamma(self, *a, **kw):
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
if tmp:
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
return tmp

View file

@ -1,193 +0,0 @@
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2014)
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from __future__ import print_function
import numpy as np
import sys
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
print('\r', end=' ')
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
function_eval number of fn evaluations
status: string describing convergence status
"""
if xtol is None:
xtol = 1e-6
if ftol is None:
ftol = 1e-6
if gtol is None:
gtol = 1e-5
sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
function_eval += 1
#if any(np.isnan(gradnew)):
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e15 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
len_maxiters = len(str(maxiters))
if display:
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
exps = exponents(fnow, current_grad)
p_iter = iteration
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0 / np.sqrt(kappa)
xplus = x + sigma * d
gplus = gradf(xplus, *optargs)
function_eval += 1
theta = np.dot(d, (gplus - gradnew)) / sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta * kappa
if delta <= 0:
delta = beta * kappa
beta = beta - theta / kappa
alpha = -mu / delta
# Calculate the comparison ratio.
xnew = x + alpha * d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "maximum number of function evaluations exceeded"
break
return x, flog, function_eval, status
Delta = 2.*(fnew - fold) / (alpha * mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
n_exps = exponents(fnow, current_grad)
if iteration - p_iter >= 20 * np.random.rand():
a = iteration >= p_iter * 2.78
b = np.any(n_exps < exps)
if a or b:
p_iter = iteration
print('')
if b:
exps = n_exps
if success:
# Test for termination
if (np.abs(fnew - fold) < ftol):
status = 'converged - relative reduction in objective'
break
# return x, flog, function_eval, status
elif (np.max(np.abs(alpha * d)) < xtol):
status = 'converged - relative stepsize'
break
else:
# Update variables for new position
gradold = gradnew
gradnew = gradf(x, *optargs)
function_eval += 1
current_grad = np.dot(gradnew, gradnew)
fold = fnew
# If the gradient is zero then we are done.
if current_grad <= gtol:
status = 'converged - relative reduction in gradient'
break
# return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0 * beta, betamax)
if Delta > 0.75:
beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
beta = 1. # This is not in the original paper
nsuccess = 0
elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
d = Gamma * d - gradnew
else:
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration)
print("")
print(status)
return x, flog, function_eval, status

View file

@ -1,92 +0,0 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
def reset(self):
"""
Reset the state of this stochastics generator.
"""
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
import numpy as np
self.Y = model.Y_normalized
bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset()
self.do_stochastics()
def do_stochastics(self):
import numpy as np
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else:
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
if self.missing_data:
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self):
self.current_dim = -1
self.d = None