[optimizer] one copy for the optimizer in optimizer_array, use this instead of _set|get_params_transformed

This commit is contained in:
mzwiessele 2014-05-22 11:39:04 +01:00
parent 43ee8ce614
commit 5a2bc4863b
7 changed files with 158 additions and 79 deletions

View file

@ -61,7 +61,7 @@ class Model(Parameterized):
on the current machine.
"""
initial_parameters = self._get_params_transformed()
initial_parameters = self.optimizer_array
if parallel:
try:
@ -124,13 +124,15 @@ class Model(Parameterized):
For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective here!
probabilistic, just return your objective to minimize here!
"""
return -float(self.log_likelihood()) - self.log_prior()
def objective_function_gradients(self):
"""
The gradients for the objective function for the given algorithm.
The gradients are w.r.t. the *negative* objective function, as
this framework works with *negative* log-likelihoods as a default.
You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters.
@ -141,7 +143,7 @@ class Model(Parameterized):
For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your gradient here!
probabilistic, just return your *negative* gradient here!
"""
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
@ -157,7 +159,8 @@ class Model(Parameterized):
:type x: np.array
"""
try:
self._set_params_transformed(x)
# self._set_params_transformed(x)
self.optimizer_array = x
obj_grads = self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
@ -180,7 +183,7 @@ class Model(Parameterized):
:parameter type: np.array
"""
try:
self._set_params_transformed(x)
self.optimizer_array = x
obj = self.objective_function()
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
@ -192,7 +195,7 @@ class Model(Parameterized):
def _objective_grads(self, x):
try:
self._set_params_transformed(x)
self.optimizer_array = x
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
@ -226,7 +229,7 @@ class Model(Parameterized):
optimizer = self.preferred_optimizer
if start == None:
start = self._get_params_transformed()
start = self.optimizer_array
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
@ -235,7 +238,7 @@ class Model(Parameterized):
self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt)
self.optimizer_array = opt.x_opt
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@ -260,7 +263,7 @@ class Model(Parameterized):
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
"""
x = self._get_params_transformed().copy()
x = self.optimizer_array.copy()
if not verbose:
# make sure only to test the selected parameters
@ -270,8 +273,8 @@ class Model(Parameterized):
transformed_index = self._raveled_index_for(target_param)
if self._has_fixes():
indices = np.r_[:self.size]
which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
if transformed_index.size == 0:
print "No free parameters to check"
@ -290,7 +293,7 @@ class Model(Parameterized):
gradient = gradient[transformed_index]
denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
if global_ratio is np.nan:
global_ratio = 0
@ -319,10 +322,10 @@ class Model(Parameterized):
param_index = self._raveled_index_for(target_param)
if self._has_fixes():
indices = np.r_[:self.size]
which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
param_index = param_index[which[0]]
transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
#print param_index, transformed_index
transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
# print param_index, transformed_index
else:
transformed_index = param_index
@ -340,7 +343,7 @@ class Model(Parameterized):
xx[xind] -= 2.*step
f2 = self._objective(xx)
numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
@ -358,7 +361,7 @@ class Model(Parameterized):
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
self._set_params_transformed(x)
self.optimizer_array = x
return ret

View file

@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s
def __reduce__(self):

View file

@ -4,7 +4,7 @@
import itertools
import numpy
np = numpy
from parameter_core import Parameterizable, adjust_name_for_printing
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
from observable_array import ObsAr
###### printing
@ -221,10 +221,9 @@ class Param(Parameterizable, ObsAr):
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s
#===========================================================================
# Printing -> done
#===========================================================================

View file

@ -16,6 +16,7 @@ Observable Pattern for patameterization
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np
import re
import logging
__updated__ = '2014-05-21'
@ -49,7 +50,6 @@ class Observable(object):
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
_updated = True
_updates = True
def __init__(self, *args, **kwargs):
super(Observable, self).__init__()
@ -58,13 +58,19 @@ class Observable(object):
@property
def updates(self):
self._updates = self._highest_parent_._updates
p = getattr(self, '_highest_parent_', None)
if p is not None:
self._updates = p._updates
return self._updates
@updates.setter
def updates(self, ups):
assert isinstance(ups, bool), "updates are either on (True) or off (False)"
self._highest_parent_._updates = ups
p = getattr(self, '_highest_parent_', None)
if p is not None:
p._updates = ups
else:
self._updates = ups
if ups:
self._trigger_params_changed()
@ -172,6 +178,7 @@ class Pickleable(object):
"""
def __init__(self, *a, **kw):
super(Pickleable, self).__init__()
#===========================================================================
# Pickling operations
#===========================================================================
@ -208,21 +215,25 @@ class Pickleable(object):
memo[id(p)] = None # set all parents to be None, so they will not be copied
memo[id(self.gradient)] = None # reset the gradient
memo[id(self.param_array)] = None # and param_array
memo[id(self.optimizer_array)] = None # and param_array
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
c = copy.deepcopy(self, memo) # and start the copy
c._parent_index_ = None
c._trigger_params_changed()
return c
def __deepcopy__(self, memo):
s = self.__new__(self.__class__) # fresh instance
memo[id(self)] = s # be sure to break all cycles --> self is already done
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
return s
def __getstate__(self):
ignore_list = ['_param_array_', # parameters get set from bottom to top
'_gradient_array_', # as well as gradients
'_optimizer_copy_',
'logger',
'_fixes_', # and fixes
'_Cacher_wrap__cachers', # never pickle cachers
]
@ -234,7 +245,8 @@ class Pickleable(object):
def __setstate__(self, state):
self.__dict__.update(state)
return self
self._transformed = True
class Gradcheckable(Pickleable, Parentable):
"""
@ -324,7 +336,6 @@ class Indexable(Nameable, Observable):
self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations()
self._old_constraints = ParameterIndexOperations()
self.priors = ParameterIndexOperations()
if self._default_constraint_ is not None:
self.constrain(self._default_constraint_)
@ -617,36 +628,103 @@ class OptimizationHandlable(Indexable):
"""
def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
self._optimizer_copy_ = None
self._transformed = True
def _get_params_transformed(self):
# transformed parameters (apply un-transformation rules)
p = self.param_array.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return p[fixes]
elif self._has_fixes():
return p[self._fixes_]
return p
#===========================================================================
# Optimizer copy
#===========================================================================
@property
def optimizer_array(self):
"""
Array for the optimizer to work on.
This array always lives in the space for the optimizer.
Thus, it is untransformed, going from Transformations.
def _set_params_transformed(self, p):
Setting this array, will make sure the transformed parameters for this model
will be set accordingly. It has to be set with an array, retrieved from
this method, as e.g. fixing will resize the array.
The optimizer should only interfere with this array, such that transofrmations
are secured.
"""
Set parameters p, but make sure they get transformed before setting.
This means, the optimizer sees p, whereas the model sees transformed(p),
such that, the parameters the model sees are in the right domain.
"""
if not(p is self.param_array):
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
self._optimizer_copy_ = np.empty(self.size)
if self._transformed:
self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self.param_array.flat[fixes] = p
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
else: self.param_array.flat = p
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
return self._optimizer_copy_[fixes]
elif self._has_fixes():
return self._optimizer_copy_[self._fixes_]
self._transformed = False
return self._optimizer_copy_
@optimizer_array.setter
def optimizer_array(self, p):
"""
Make sure the optimizer copy does not get touched, thus, we only want to
set the values *inside* not the array itself.
Also we want to update param_array in here.
"""
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
self._optimizer_copy_ = np.empty(self.size)
self._optimizer_copy_.flat = self.param_array.flat
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self._optimizer_copy_.flat[fixes] = p
elif self._has_fixes(): self._optimizer_copy_.flat[self._fixes_] = p
else: self._optimizer_copy_.flat = p
self.param_array.flat = self._optimizer_copy_.flat
[np.put(self.param_array, ind, c.f(self._optimizer_copy_.flat[ind]))
for c, ind in self.constraints.iteritems() if c != __fixed__]
self._transformed = True
self._trigger_params_changed()
def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
# # transformed parameters (apply un-transformation rules)
# p = self.param_array.copy()
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# return p[fixes]
# elif self._has_fixes():
# return p[self._fixes_]
# return p
#
def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
# """
# Set parameters p, but make sure they get transformed before setting.
# This means, the optimizer sees p, whereas the model sees transformed(p),
# such that, the parameters the model sees are in the right domain.
# """
# if not(p is self.param_array):
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# self.param_array.flat[fixes] = p
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
# else: self.param_array.flat = p
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
# for c, ind in self.constraints.iteritems() if c != __fixed__]
# self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True):
"""
First tell all children to update,
@ -725,7 +803,7 @@ class OptimizationHandlable(Indexable):
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
#===========================================================================
# For shared memory arrays. This does nothing in Param, but sets the memory
@ -784,6 +862,7 @@ class Parameterizable(OptimizationHandlable):
self.parameters = ArrayList()
self._param_array_ = None
self._added_names_ = set()
self.logger = logging.getLogger(self.__class__.__name__)
self.__visited = False # for traversing in reverse order we need to know if we were here already
@property
@ -894,6 +973,11 @@ class Parameterizable(OptimizationHandlable):
self._remove_parameter_name(None, old_name)
self._add_parameter_name(param)
def __setstate__(self, state):
super(Parameterizable, self).__setstate__(state)
self.logger = logging.getLogger(self.__class__.__name__)
return self
#===========================================================================
# notification system
#===========================================================================

View file

@ -26,7 +26,7 @@ class BayesianGPLVM(SparseGP):
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
self.logger = logging.getLogger("Bayesian GPLVM <{}>".format(hex(id(self))))
self.logger = logging.getLogger(self.__class__.__name__)
if X == None:
from ..util.initialization import initialize_latent
self.logger.info("initializing latent space X with method {}".format(init))

View file

@ -59,7 +59,7 @@ class MRD(SparseGP):
inference_method=None, likelihoods=None, name='mrd', Ynames=None):
super(GP, self).__init__(name)
self.logger = logging.getLogger("MRD <{}>".format(hex(id(self))))
self.logger = logging.getLogger(self.__class__.__name__)
self.input_dim = input_dim
self.num_inducing = num_inducing
@ -107,16 +107,16 @@ class MRD(SparseGP):
self.logger.info("building kernels")
if kernel is None:
from ..kern import RBF
self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
self.kernels = []
kernels = []
for i in range(len(Ylist)):
k = kernel.copy()
self.kernels.append(k)
kernels.append(k)
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.kernels = kernel
kernels = kernel
if X_variance is None:
X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -125,8 +125,8 @@ class MRD(SparseGP):
self.X = NormalPosterior(X, X_variance)
if likelihoods is None:
self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: self.likelihoods = likelihoods
likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: likelihoods = likelihoods
self.logger.info("adding X and Z")
self.add_parameters(self.X, self.Z)
@ -134,9 +134,8 @@ class MRD(SparseGP):
self.bgplvms = []
self.num_data = Ylist[0].shape[0]
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
p = Parameterized(name=n)
p.add_parameter(k)
p.kern = k
@ -154,19 +153,18 @@ class MRD(SparseGP):
self.posteriors = []
self.Z.gradient[:] = 0.
self.X.gradient[:] = 0.
for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
self.logger.info('working on im <{}>'.format(hex(id(i))))
k, l = b.kern, b.likelihood
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
self.posteriors.append(posterior)
self._log_marginal_likelihood += lml
# likelihoods gradients
self.logger.info("likelihood gradients")
l.update_gradients(grad_dict.pop('dL_dthetaL'))
#gradients wrt kernel
self.logger.info("kernel gradients")
dL_dKmm = grad_dict.pop('dL_dKmm')
k.update_gradients_full(dL_dKmm, self.Z, None)
target = k.gradient.copy()
@ -174,7 +172,6 @@ class MRD(SparseGP):
k.gradient += target
#gradients wrt Z
self.logger.info("Z gradients")
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += k.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
@ -182,16 +179,15 @@ class MRD(SparseGP):
grad_dict['dL_dpsi2'],
Z=self.Z, variational_posterior=self.X)
self.logger.info("X gradients")
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
self.X.mean.gradient += dL_dmean
self.X.variance.gradient += dL_dS
# update for the KL divergence
self.posterior = self.posteriors[0]
self.kern = self.kernels[0]
self.likelihood = self.likelihoods[0]
self.kern = self.bgplvms[0].kern
self.likelihood = self.bgplvms[0].likelihood
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -268,8 +264,8 @@ class MRD(SparseGP):
This predicts the output mean and variance for the dataset given in Ylist[Yindex]
"""
self.posterior = self.posteriors[Yindex]
self.kern = self.kernels[Yindex]
self.likelihood = self.likelihoods[Yindex]
self.kern = self.bgplvms[0].kern
self.likelihood = self.bgplvms[0].likelihood
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
#===============================================================================
@ -311,7 +307,7 @@ class MRD(SparseGP):
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
from ..plotting.matplot_dep import dim_reduction_plots
if "Yindex" not in predict_kwargs:
predict_kwargs['Yindex'] = 0
@ -333,10 +329,7 @@ class MRD(SparseGP):
return plot
def __getstate__(self):
# TODO:
import copy
state = copy.copy(self.__dict__)
del state['kernels']
state = super(MRD, self).__getstate__()
del state['kern']
del state['likelihood']
return state
@ -344,7 +337,6 @@ class MRD(SparseGP):
def __setstate__(self, state):
# TODO:
super(MRD, self).__setstate__(state)
self.kernels = [p.kern for p in self.bgplvms]
self.kern = self.kernels[0]
self.likelihood = self.likelihoods[0]
self.kern = self.bgplvms[0].kern
self.likelihood = self.bgplvms[0].likelihood
self.parameters_changed()

View file

@ -16,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
from GPy.kern._src.rbf import RBF
from GPy.kern._src.linear import Linear
from GPy.kern._src.static import Bias, White
from GPy.examples.dimensionality_reduction import mrd_simulation,\
bgplvm_simulation
from GPy.examples.dimensionality_reduction import mrd_simulation
from GPy.examples.regression import toy_rbf_1d_50
from GPy.core.parameterization.variational import NormalPosterior
from GPy.models.gp_regression import GPRegression
@ -90,6 +89,7 @@ class Test(ListDictTestCase):
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10 # gradient does not get copied anymore
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array)
@ -151,6 +151,7 @@ class Test(ListDictTestCase):
par = NormalPosterior(X,Xv)
par.gradient = 10
pcopy = par.copy()
pcopy.gradient = 10
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy))