[optimizer] one copy for the optimizer in optimizer_array, use this instead of _set|get_params_transformed

This commit is contained in:
mzwiessele 2014-05-22 11:39:04 +01:00
parent 43ee8ce614
commit 5a2bc4863b
7 changed files with 158 additions and 79 deletions

View file

@ -61,7 +61,7 @@ class Model(Parameterized):
on the current machine. on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self.optimizer_array
if parallel: if parallel:
try: try:
@ -124,13 +124,15 @@ class Model(Parameterized):
For probabilistic models this is the negative log_likelihood For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective here! probabilistic, just return your objective to minimize here!
""" """
return -float(self.log_likelihood()) - self.log_prior() return -float(self.log_likelihood()) - self.log_prior()
def objective_function_gradients(self): def objective_function_gradients(self):
""" """
The gradients for the objective function for the given algorithm. The gradients for the objective function for the given algorithm.
The gradients are w.r.t. the *negative* objective function, as
this framework works with *negative* log-likelihoods as a default.
You can find the gradient for the parameters in self.gradient at all times. You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters. This is the place, where gradients get stored for parameters.
@ -141,7 +143,7 @@ class Model(Parameterized):
For probabilistic models this is the gradient of the negative log_likelihood For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not (including the MAP prior), so we return it here. If your model is not
probabilistic, just return your gradient here! probabilistic, just return your *negative* gradient here!
""" """
return -(self._log_likelihood_gradients() + self._log_prior_gradients()) return -(self._log_likelihood_gradients() + self._log_prior_gradients())
@ -157,7 +159,8 @@ class Model(Parameterized):
:type x: np.array :type x: np.array
""" """
try: try:
self._set_params_transformed(x) # self._set_params_transformed(x)
self.optimizer_array = x
obj_grads = self._transform_gradients(self.objective_function_gradients()) obj_grads = self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError): except (LinAlgError, ZeroDivisionError, ValueError):
@ -180,7 +183,7 @@ class Model(Parameterized):
:parameter type: np.array :parameter type: np.array
""" """
try: try:
self._set_params_transformed(x) self.optimizer_array = x
obj = self.objective_function() obj = self.objective_function()
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError): except (LinAlgError, ZeroDivisionError, ValueError):
@ -192,7 +195,7 @@ class Model(Parameterized):
def _objective_grads(self, x): def _objective_grads(self, x):
try: try:
self._set_params_transformed(x) self.optimizer_array = x
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError): except (LinAlgError, ZeroDivisionError, ValueError):
@ -226,7 +229,7 @@ class Model(Parameterized):
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
if start == None: if start == None:
start = self._get_params_transformed() start = self.optimizer_array
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs) opt = optimizer(start, model=self, **kwargs)
@ -235,7 +238,7 @@ class Model(Parameterized):
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt) self.optimizer_array = opt.x_opt
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1" # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@ -260,7 +263,7 @@ class Model(Parameterized):
The gradient is considered correct if the ratio of the analytical The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity. and numerical gradients is within <tolerance> of unity.
""" """
x = self._get_params_transformed().copy() x = self.optimizer_array.copy()
if not verbose: if not verbose:
# make sure only to test the selected parameters # make sure only to test the selected parameters
@ -270,8 +273,8 @@ class Model(Parameterized):
transformed_index = self._raveled_index_for(target_param) transformed_index = self._raveled_index_for(target_param)
if self._has_fixes(): if self._has_fixes():
indices = np.r_[:self.size] indices = np.r_[:self.size]
which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero() which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]] transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
if transformed_index.size == 0: if transformed_index.size == 0:
print "No free parameters to check" print "No free parameters to check"
@ -290,7 +293,7 @@ class Model(Parameterized):
gradient = gradient[transformed_index] gradient = gradient[transformed_index]
denominator = (2 * np.dot(dx, gradient)) denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator) global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
if global_ratio is np.nan: if global_ratio is np.nan:
global_ratio = 0 global_ratio = 0
@ -319,10 +322,10 @@ class Model(Parameterized):
param_index = self._raveled_index_for(target_param) param_index = self._raveled_index_for(target_param)
if self._has_fixes(): if self._has_fixes():
indices = np.r_[:self.size] indices = np.r_[:self.size]
which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero() which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
param_index = param_index[which[0]] param_index = param_index[which[0]]
transformed_index = (indices-(~self._fixes_).cumsum())[param_index] transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
#print param_index, transformed_index # print param_index, transformed_index
else: else:
transformed_index = param_index transformed_index = param_index
@ -340,7 +343,7 @@ class Model(Parameterized):
xx[xind] -= 2.*step xx[xind] -= 2.*step
f2 = self._objective(xx) f2 = self._objective(xx)
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind] if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind]) else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs((f1 - f2) / 2 / step - gradient[xind]) difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
@ -358,7 +361,7 @@ class Model(Parameterized):
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string
self._set_params_transformed(x) self.optimizer_array = x
return ret return ret

View file

@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
memo[id(self)] = s memo[id(self)] = s
import copy import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s return s
def __reduce__(self): def __reduce__(self):

View file

@ -4,7 +4,7 @@
import itertools import itertools
import numpy import numpy
np = numpy np = numpy
from parameter_core import Parameterizable, adjust_name_for_printing from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
from observable_array import ObsAr from observable_array import ObsAr
###### printing ###### printing
@ -221,10 +221,9 @@ class Param(Parameterizable, ObsAr):
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy()) s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
memo[id(self)] = s memo[id(self)] = s
import copy import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s return s
#=========================================================================== #===========================================================================
# Printing -> done # Printing -> done
#=========================================================================== #===========================================================================

View file

@ -16,6 +16,7 @@ Observable Pattern for patameterization
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np import numpy as np
import re import re
import logging
__updated__ = '2014-05-21' __updated__ = '2014-05-21'
@ -49,7 +50,6 @@ class Observable(object):
as an observer. Every time the observable changes, it sends a notification with as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers. self as only argument to all its observers.
""" """
_updated = True
_updates = True _updates = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__() super(Observable, self).__init__()
@ -58,13 +58,19 @@ class Observable(object):
@property @property
def updates(self): def updates(self):
self._updates = self._highest_parent_._updates p = getattr(self, '_highest_parent_', None)
if p is not None:
self._updates = p._updates
return self._updates return self._updates
@updates.setter @updates.setter
def updates(self, ups): def updates(self, ups):
assert isinstance(ups, bool), "updates are either on (True) or off (False)" assert isinstance(ups, bool), "updates are either on (True) or off (False)"
self._highest_parent_._updates = ups p = getattr(self, '_highest_parent_', None)
if p is not None:
p._updates = ups
else:
self._updates = ups
if ups: if ups:
self._trigger_params_changed() self._trigger_params_changed()
@ -172,6 +178,7 @@ class Pickleable(object):
""" """
def __init__(self, *a, **kw): def __init__(self, *a, **kw):
super(Pickleable, self).__init__() super(Pickleable, self).__init__()
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
@ -208,21 +215,25 @@ class Pickleable(object):
memo[id(p)] = None # set all parents to be None, so they will not be copied memo[id(p)] = None # set all parents to be None, so they will not be copied
memo[id(self.gradient)] = None # reset the gradient memo[id(self.gradient)] = None # reset the gradient
memo[id(self.param_array)] = None # and param_array memo[id(self.param_array)] = None # and param_array
memo[id(self.optimizer_array)] = None # and param_array
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
c = copy.deepcopy(self, memo) # and start the copy c = copy.deepcopy(self, memo) # and start the copy
c._parent_index_ = None c._parent_index_ = None
c._trigger_params_changed()
return c return c
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
s = self.__new__(self.__class__) # fresh instance s = self.__new__(self.__class__) # fresh instance
memo[id(self)] = s # be sure to break all cycles --> self is already done memo[id(self)] = s # be sure to break all cycles --> self is already done
import copy import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
return s return s
def __getstate__(self): def __getstate__(self):
ignore_list = ['_param_array_', # parameters get set from bottom to top ignore_list = ['_param_array_', # parameters get set from bottom to top
'_gradient_array_', # as well as gradients '_gradient_array_', # as well as gradients
'_optimizer_copy_',
'logger',
'_fixes_', # and fixes '_fixes_', # and fixes
'_Cacher_wrap__cachers', # never pickle cachers '_Cacher_wrap__cachers', # never pickle cachers
] ]
@ -234,7 +245,8 @@ class Pickleable(object):
def __setstate__(self, state): def __setstate__(self, state):
self.__dict__.update(state) self.__dict__.update(state)
return self self._transformed = True
class Gradcheckable(Pickleable, Parentable): class Gradcheckable(Pickleable, Parentable):
""" """
@ -324,7 +336,6 @@ class Indexable(Nameable, Observable):
self._default_constraint_ = default_constraint self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations() self.constraints = ParameterIndexOperations()
self._old_constraints = ParameterIndexOperations()
self.priors = ParameterIndexOperations() self.priors = ParameterIndexOperations()
if self._default_constraint_ is not None: if self._default_constraint_ is not None:
self.constrain(self._default_constraint_) self.constrain(self._default_constraint_)
@ -617,36 +628,103 @@ class OptimizationHandlable(Indexable):
""" """
def __init__(self, name, default_constraint=None, *a, **kw): def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
self._optimizer_copy_ = None
self._transformed = True
def _get_params_transformed(self): #===========================================================================
# transformed parameters (apply un-transformation rules) # Optimizer copy
p = self.param_array.copy() #===========================================================================
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] @property
if self.has_parent() and self.constraints[__fixed__].size != 0: def optimizer_array(self):
fixes = np.ones(self.size).astype(bool) """
fixes[self.constraints[__fixed__]] = FIXED Array for the optimizer to work on.
return p[fixes] This array always lives in the space for the optimizer.
elif self._has_fixes(): Thus, it is untransformed, going from Transformations.
return p[self._fixes_]
return p
def _set_params_transformed(self, p): Setting this array, will make sure the transformed parameters for this model
will be set accordingly. It has to be set with an array, retrieved from
this method, as e.g. fixing will resize the array.
The optimizer should only interfere with this array, such that transofrmations
are secured.
""" """
Set parameters p, but make sure they get transformed before setting. if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
This means, the optimizer sees p, whereas the model sees transformed(p), self._optimizer_copy_ = np.empty(self.size)
such that, the parameters the model sees are in the right domain.
""" if self._transformed:
if not(p is self.param_array): self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED fixes[self.constraints[__fixed__]] = FIXED
self.param_array.flat[fixes] = p return self._optimizer_copy_[fixes]
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p elif self._has_fixes():
else: self.param_array.flat = p return self._optimizer_copy_[self._fixes_]
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) self._transformed = False
return self._optimizer_copy_
@optimizer_array.setter
def optimizer_array(self, p):
"""
Make sure the optimizer copy does not get touched, thus, we only want to
set the values *inside* not the array itself.
Also we want to update param_array in here.
"""
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
self._optimizer_copy_ = np.empty(self.size)
self._optimizer_copy_.flat = self.param_array.flat
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self._optimizer_copy_.flat[fixes] = p
elif self._has_fixes(): self._optimizer_copy_.flat[self._fixes_] = p
else: self._optimizer_copy_.flat = p
self.param_array.flat = self._optimizer_copy_.flat
[np.put(self.param_array, ind, c.f(self._optimizer_copy_.flat[ind]))
for c, ind in self.constraints.iteritems() if c != __fixed__] for c, ind in self.constraints.iteritems() if c != __fixed__]
self._transformed = True
self._trigger_params_changed() self._trigger_params_changed()
def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
# # transformed parameters (apply un-transformation rules)
# p = self.param_array.copy()
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# return p[fixes]
# elif self._has_fixes():
# return p[self._fixes_]
# return p
#
def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
# """
# Set parameters p, but make sure they get transformed before setting.
# This means, the optimizer sees p, whereas the model sees transformed(p),
# such that, the parameters the model sees are in the right domain.
# """
# if not(p is self.param_array):
# if self.has_parent() and self.constraints[__fixed__].size != 0:
# fixes = np.ones(self.size).astype(bool)
# fixes[self.constraints[__fixed__]] = FIXED
# self.param_array.flat[fixes] = p
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
# else: self.param_array.flat = p
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
# for c, ind in self.constraints.iteritems() if c != __fixed__]
# self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
""" """
First tell all children to update, First tell all children to update,
@ -725,7 +803,7 @@ class OptimizationHandlable(Indexable):
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs) x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible # now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...) self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
#=========================================================================== #===========================================================================
# For shared memory arrays. This does nothing in Param, but sets the memory # For shared memory arrays. This does nothing in Param, but sets the memory
@ -784,6 +862,7 @@ class Parameterizable(OptimizationHandlable):
self.parameters = ArrayList() self.parameters = ArrayList()
self._param_array_ = None self._param_array_ = None
self._added_names_ = set() self._added_names_ = set()
self.logger = logging.getLogger(self.__class__.__name__)
self.__visited = False # for traversing in reverse order we need to know if we were here already self.__visited = False # for traversing in reverse order we need to know if we were here already
@property @property
@ -894,6 +973,11 @@ class Parameterizable(OptimizationHandlable):
self._remove_parameter_name(None, old_name) self._remove_parameter_name(None, old_name)
self._add_parameter_name(param) self._add_parameter_name(param)
def __setstate__(self, state):
super(Parameterizable, self).__setstate__(state)
self.logger = logging.getLogger(self.__class__.__name__)
return self
#=========================================================================== #===========================================================================
# notification system # notification system
#=========================================================================== #===========================================================================

View file

@ -26,7 +26,7 @@ class BayesianGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
self.logger = logging.getLogger("Bayesian GPLVM <{}>".format(hex(id(self)))) self.logger = logging.getLogger(self.__class__.__name__)
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
self.logger.info("initializing latent space X with method {}".format(init)) self.logger.info("initializing latent space X with method {}".format(init))

View file

@ -59,7 +59,7 @@ class MRD(SparseGP):
inference_method=None, likelihoods=None, name='mrd', Ynames=None): inference_method=None, likelihoods=None, name='mrd', Ynames=None):
super(GP, self).__init__(name) super(GP, self).__init__(name)
self.logger = logging.getLogger("MRD <{}>".format(hex(id(self)))) self.logger = logging.getLogger(self.__class__.__name__)
self.input_dim = input_dim self.input_dim = input_dim
self.num_inducing = num_inducing self.num_inducing = num_inducing
@ -107,16 +107,16 @@ class MRD(SparseGP):
self.logger.info("building kernels") self.logger.info("building kernels")
if kernel is None: if kernel is None:
from ..kern import RBF from ..kern import RBF
self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))] kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
elif isinstance(kernel, Kern): elif isinstance(kernel, Kern):
self.kernels = [] kernels = []
for i in range(len(Ylist)): for i in range(len(Ylist)):
k = kernel.copy() k = kernel.copy()
self.kernels.append(k) kernels.append(k)
else: else:
assert len(kernel) == len(Ylist), "need one kernel per output" assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.kernels = kernel kernels = kernel
if X_variance is None: if X_variance is None:
X_variance = np.random.uniform(0.1, 0.2, X.shape) X_variance = np.random.uniform(0.1, 0.2, X.shape)
@ -125,8 +125,8 @@ class MRD(SparseGP):
self.X = NormalPosterior(X, X_variance) self.X = NormalPosterior(X, X_variance)
if likelihoods is None: if likelihoods is None:
self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))] likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: self.likelihoods = likelihoods else: likelihoods = likelihoods
self.logger.info("adding X and Z") self.logger.info("adding X and Z")
self.add_parameters(self.X, self.Z) self.add_parameters(self.X, self.Z)
@ -134,9 +134,8 @@ class MRD(SparseGP):
self.bgplvms = [] self.bgplvms = []
self.num_data = Ylist[0].shape[0] self.num_data = Ylist[0].shape[0]
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist): for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another" assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
p = Parameterized(name=n) p = Parameterized(name=n)
p.add_parameter(k) p.add_parameter(k)
p.kern = k p.kern = k
@ -154,19 +153,18 @@ class MRD(SparseGP):
self.posteriors = [] self.posteriors = []
self.Z.gradient[:] = 0. self.Z.gradient[:] = 0.
self.X.gradient[:] = 0. self.X.gradient[:] = 0.
for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method): for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
self.logger.info('working on im <{}>'.format(hex(id(i)))) self.logger.info('working on im <{}>'.format(hex(id(i))))
k, l = b.kern, b.likelihood
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
self.posteriors.append(posterior) self.posteriors.append(posterior)
self._log_marginal_likelihood += lml self._log_marginal_likelihood += lml
# likelihoods gradients # likelihoods gradients
self.logger.info("likelihood gradients")
l.update_gradients(grad_dict.pop('dL_dthetaL')) l.update_gradients(grad_dict.pop('dL_dthetaL'))
#gradients wrt kernel #gradients wrt kernel
self.logger.info("kernel gradients")
dL_dKmm = grad_dict.pop('dL_dKmm') dL_dKmm = grad_dict.pop('dL_dKmm')
k.update_gradients_full(dL_dKmm, self.Z, None) k.update_gradients_full(dL_dKmm, self.Z, None)
target = k.gradient.copy() target = k.gradient.copy()
@ -174,7 +172,6 @@ class MRD(SparseGP):
k.gradient += target k.gradient += target
#gradients wrt Z #gradients wrt Z
self.logger.info("Z gradients")
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z) self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += k.gradients_Z_expectations( self.Z.gradient += k.gradients_Z_expectations(
grad_dict['dL_dpsi0'], grad_dict['dL_dpsi0'],
@ -182,16 +179,15 @@ class MRD(SparseGP):
grad_dict['dL_dpsi2'], grad_dict['dL_dpsi2'],
Z=self.Z, variational_posterior=self.X) Z=self.Z, variational_posterior=self.X)
self.logger.info("X gradients")
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict) dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
self.X.mean.gradient += dL_dmean self.X.mean.gradient += dL_dmean
self.X.variance.gradient += dL_dS self.X.variance.gradient += dL_dS
# update for the KL divergence
self.posterior = self.posteriors[0] self.posterior = self.posteriors[0]
self.kern = self.kernels[0] self.kern = self.bgplvms[0].kern
self.likelihood = self.likelihoods[0] self.likelihood = self.bgplvms[0].likelihood
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -268,8 +264,8 @@ class MRD(SparseGP):
This predicts the output mean and variance for the dataset given in Ylist[Yindex] This predicts the output mean and variance for the dataset given in Ylist[Yindex]
""" """
self.posterior = self.posteriors[Yindex] self.posterior = self.posteriors[Yindex]
self.kern = self.kernels[Yindex] self.kern = self.bgplvms[0].kern
self.likelihood = self.likelihoods[Yindex] self.likelihood = self.bgplvms[0].likelihood
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern) return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
#=============================================================================== #===============================================================================
@ -311,7 +307,7 @@ class MRD(SparseGP):
""" """
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt from matplotlib import pyplot as plt
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
if "Yindex" not in predict_kwargs: if "Yindex" not in predict_kwargs:
predict_kwargs['Yindex'] = 0 predict_kwargs['Yindex'] = 0
@ -333,10 +329,7 @@ class MRD(SparseGP):
return plot return plot
def __getstate__(self): def __getstate__(self):
# TODO: state = super(MRD, self).__getstate__()
import copy
state = copy.copy(self.__dict__)
del state['kernels']
del state['kern'] del state['kern']
del state['likelihood'] del state['likelihood']
return state return state
@ -344,7 +337,6 @@ class MRD(SparseGP):
def __setstate__(self, state): def __setstate__(self, state):
# TODO: # TODO:
super(MRD, self).__setstate__(state) super(MRD, self).__setstate__(state)
self.kernels = [p.kern for p in self.bgplvms] self.kern = self.bgplvms[0].kern
self.kern = self.kernels[0] self.likelihood = self.bgplvms[0].likelihood
self.likelihood = self.likelihoods[0]
self.parameters_changed() self.parameters_changed()

View file

@ -16,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
from GPy.kern._src.rbf import RBF from GPy.kern._src.rbf import RBF
from GPy.kern._src.linear import Linear from GPy.kern._src.linear import Linear
from GPy.kern._src.static import Bias, White from GPy.kern._src.static import Bias, White
from GPy.examples.dimensionality_reduction import mrd_simulation,\ from GPy.examples.dimensionality_reduction import mrd_simulation
bgplvm_simulation
from GPy.examples.regression import toy_rbf_1d_50 from GPy.examples.regression import toy_rbf_1d_50
from GPy.core.parameterization.variational import NormalPosterior from GPy.core.parameterization.variational import NormalPosterior
from GPy.models.gp_regression import GPRegression from GPy.models.gp_regression import GPRegression
@ -90,6 +89,7 @@ class Test(ListDictTestCase):
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops) self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops) self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10 # gradient does not get copied anymore
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
@ -151,6 +151,7 @@ class Test(ListDictTestCase):
par = NormalPosterior(X,Xv) par = NormalPosterior(X,Xv)
par.gradient = 10 par.gradient = 10
pcopy = par.copy() pcopy = par.copy()
pcopy.gradient = 10
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))