mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-06 10:32:39 +02:00
[optimizer] one copy for the optimizer in optimizer_array, use this instead of _set|get_params_transformed
This commit is contained in:
parent
43ee8ce614
commit
5a2bc4863b
7 changed files with 158 additions and 79 deletions
|
|
@ -61,7 +61,7 @@ class Model(Parameterized):
|
||||||
on the current machine.
|
on the current machine.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
initial_parameters = self._get_params_transformed()
|
initial_parameters = self.optimizer_array
|
||||||
|
|
||||||
if parallel:
|
if parallel:
|
||||||
try:
|
try:
|
||||||
|
|
@ -124,13 +124,15 @@ class Model(Parameterized):
|
||||||
|
|
||||||
For probabilistic models this is the negative log_likelihood
|
For probabilistic models this is the negative log_likelihood
|
||||||
(including the MAP prior), so we return it here. If your model is not
|
(including the MAP prior), so we return it here. If your model is not
|
||||||
probabilistic, just return your objective here!
|
probabilistic, just return your objective to minimize here!
|
||||||
"""
|
"""
|
||||||
return -float(self.log_likelihood()) - self.log_prior()
|
return -float(self.log_likelihood()) - self.log_prior()
|
||||||
|
|
||||||
def objective_function_gradients(self):
|
def objective_function_gradients(self):
|
||||||
"""
|
"""
|
||||||
The gradients for the objective function for the given algorithm.
|
The gradients for the objective function for the given algorithm.
|
||||||
|
The gradients are w.r.t. the *negative* objective function, as
|
||||||
|
this framework works with *negative* log-likelihoods as a default.
|
||||||
|
|
||||||
You can find the gradient for the parameters in self.gradient at all times.
|
You can find the gradient for the parameters in self.gradient at all times.
|
||||||
This is the place, where gradients get stored for parameters.
|
This is the place, where gradients get stored for parameters.
|
||||||
|
|
@ -141,7 +143,7 @@ class Model(Parameterized):
|
||||||
|
|
||||||
For probabilistic models this is the gradient of the negative log_likelihood
|
For probabilistic models this is the gradient of the negative log_likelihood
|
||||||
(including the MAP prior), so we return it here. If your model is not
|
(including the MAP prior), so we return it here. If your model is not
|
||||||
probabilistic, just return your gradient here!
|
probabilistic, just return your *negative* gradient here!
|
||||||
"""
|
"""
|
||||||
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||||
|
|
||||||
|
|
@ -157,7 +159,8 @@ class Model(Parameterized):
|
||||||
:type x: np.array
|
:type x: np.array
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self._set_params_transformed(x)
|
# self._set_params_transformed(x)
|
||||||
|
self.optimizer_array = x
|
||||||
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
||||||
self._fail_count = 0
|
self._fail_count = 0
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
|
|
@ -180,7 +183,7 @@ class Model(Parameterized):
|
||||||
:parameter type: np.array
|
:parameter type: np.array
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self._set_params_transformed(x)
|
self.optimizer_array = x
|
||||||
obj = self.objective_function()
|
obj = self.objective_function()
|
||||||
self._fail_count = 0
|
self._fail_count = 0
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
|
|
@ -192,7 +195,7 @@ class Model(Parameterized):
|
||||||
|
|
||||||
def _objective_grads(self, x):
|
def _objective_grads(self, x):
|
||||||
try:
|
try:
|
||||||
self._set_params_transformed(x)
|
self.optimizer_array = x
|
||||||
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
||||||
self._fail_count = 0
|
self._fail_count = 0
|
||||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
|
|
@ -226,7 +229,7 @@ class Model(Parameterized):
|
||||||
optimizer = self.preferred_optimizer
|
optimizer = self.preferred_optimizer
|
||||||
|
|
||||||
if start == None:
|
if start == None:
|
||||||
start = self._get_params_transformed()
|
start = self.optimizer_array
|
||||||
|
|
||||||
optimizer = optimization.get_optimizer(optimizer)
|
optimizer = optimization.get_optimizer(optimizer)
|
||||||
opt = optimizer(start, model=self, **kwargs)
|
opt = optimizer(start, model=self, **kwargs)
|
||||||
|
|
@ -235,7 +238,7 @@ class Model(Parameterized):
|
||||||
|
|
||||||
self.optimization_runs.append(opt)
|
self.optimization_runs.append(opt)
|
||||||
|
|
||||||
self._set_params_transformed(opt.x_opt)
|
self.optimizer_array = opt.x_opt
|
||||||
|
|
||||||
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
||||||
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
||||||
|
|
@ -260,7 +263,7 @@ class Model(Parameterized):
|
||||||
The gradient is considered correct if the ratio of the analytical
|
The gradient is considered correct if the ratio of the analytical
|
||||||
and numerical gradients is within <tolerance> of unity.
|
and numerical gradients is within <tolerance> of unity.
|
||||||
"""
|
"""
|
||||||
x = self._get_params_transformed().copy()
|
x = self.optimizer_array.copy()
|
||||||
|
|
||||||
if not verbose:
|
if not verbose:
|
||||||
# make sure only to test the selected parameters
|
# make sure only to test the selected parameters
|
||||||
|
|
@ -270,8 +273,8 @@ class Model(Parameterized):
|
||||||
transformed_index = self._raveled_index_for(target_param)
|
transformed_index = self._raveled_index_for(target_param)
|
||||||
if self._has_fixes():
|
if self._has_fixes():
|
||||||
indices = np.r_[:self.size]
|
indices = np.r_[:self.size]
|
||||||
which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
|
which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
||||||
transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
|
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
|
||||||
|
|
||||||
if transformed_index.size == 0:
|
if transformed_index.size == 0:
|
||||||
print "No free parameters to check"
|
print "No free parameters to check"
|
||||||
|
|
@ -290,7 +293,7 @@ class Model(Parameterized):
|
||||||
gradient = gradient[transformed_index]
|
gradient = gradient[transformed_index]
|
||||||
|
|
||||||
denominator = (2 * np.dot(dx, gradient))
|
denominator = (2 * np.dot(dx, gradient))
|
||||||
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
|
global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
|
||||||
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
|
||||||
if global_ratio is np.nan:
|
if global_ratio is np.nan:
|
||||||
global_ratio = 0
|
global_ratio = 0
|
||||||
|
|
@ -319,10 +322,10 @@ class Model(Parameterized):
|
||||||
param_index = self._raveled_index_for(target_param)
|
param_index = self._raveled_index_for(target_param)
|
||||||
if self._has_fixes():
|
if self._has_fixes():
|
||||||
indices = np.r_[:self.size]
|
indices = np.r_[:self.size]
|
||||||
which = (param_index[:,None]==indices[self._fixes_][None,:]).nonzero()
|
which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
|
||||||
param_index = param_index[which[0]]
|
param_index = param_index[which[0]]
|
||||||
transformed_index = (indices-(~self._fixes_).cumsum())[param_index]
|
transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
|
||||||
#print param_index, transformed_index
|
# print param_index, transformed_index
|
||||||
else:
|
else:
|
||||||
transformed_index = param_index
|
transformed_index = param_index
|
||||||
|
|
||||||
|
|
@ -340,7 +343,7 @@ class Model(Parameterized):
|
||||||
xx[xind] -= 2.*step
|
xx[xind] -= 2.*step
|
||||||
f2 = self._objective(xx)
|
f2 = self._objective(xx)
|
||||||
numerical_gradient = (f1 - f2) / (2 * step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
||||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||||
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
|
difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
|
||||||
|
|
||||||
|
|
@ -358,7 +361,7 @@ class Model(Parameterized):
|
||||||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||||
print grad_string
|
print grad_string
|
||||||
|
|
||||||
self._set_params_transformed(x)
|
self.optimizer_array = x
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
|
||||||
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
||||||
memo[id(self)] = s
|
memo[id(self)] = s
|
||||||
import copy
|
import copy
|
||||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
import itertools
|
import itertools
|
||||||
import numpy
|
import numpy
|
||||||
np = numpy
|
np = numpy
|
||||||
from parameter_core import Parameterizable, adjust_name_for_printing
|
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
|
||||||
from observable_array import ObsAr
|
from observable_array import ObsAr
|
||||||
|
|
||||||
###### printing
|
###### printing
|
||||||
|
|
@ -221,10 +221,9 @@ class Param(Parameterizable, ObsAr):
|
||||||
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
|
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
|
||||||
memo[id(self)] = s
|
memo[id(self)] = s
|
||||||
import copy
|
import copy
|
||||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Printing -> done
|
# Printing -> done
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ Observable Pattern for patameterization
|
||||||
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import re
|
import re
|
||||||
|
import logging
|
||||||
|
|
||||||
__updated__ = '2014-05-21'
|
__updated__ = '2014-05-21'
|
||||||
|
|
||||||
|
|
@ -49,7 +50,6 @@ class Observable(object):
|
||||||
as an observer. Every time the observable changes, it sends a notification with
|
as an observer. Every time the observable changes, it sends a notification with
|
||||||
self as only argument to all its observers.
|
self as only argument to all its observers.
|
||||||
"""
|
"""
|
||||||
_updated = True
|
|
||||||
_updates = True
|
_updates = True
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(Observable, self).__init__()
|
super(Observable, self).__init__()
|
||||||
|
|
@ -58,13 +58,19 @@ class Observable(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def updates(self):
|
def updates(self):
|
||||||
self._updates = self._highest_parent_._updates
|
p = getattr(self, '_highest_parent_', None)
|
||||||
|
if p is not None:
|
||||||
|
self._updates = p._updates
|
||||||
return self._updates
|
return self._updates
|
||||||
|
|
||||||
@updates.setter
|
@updates.setter
|
||||||
def updates(self, ups):
|
def updates(self, ups):
|
||||||
assert isinstance(ups, bool), "updates are either on (True) or off (False)"
|
assert isinstance(ups, bool), "updates are either on (True) or off (False)"
|
||||||
self._highest_parent_._updates = ups
|
p = getattr(self, '_highest_parent_', None)
|
||||||
|
if p is not None:
|
||||||
|
p._updates = ups
|
||||||
|
else:
|
||||||
|
self._updates = ups
|
||||||
if ups:
|
if ups:
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
||||||
|
|
@ -172,6 +178,7 @@ class Pickleable(object):
|
||||||
"""
|
"""
|
||||||
def __init__(self, *a, **kw):
|
def __init__(self, *a, **kw):
|
||||||
super(Pickleable, self).__init__()
|
super(Pickleable, self).__init__()
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Pickling operations
|
# Pickling operations
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
@ -208,21 +215,25 @@ class Pickleable(object):
|
||||||
memo[id(p)] = None # set all parents to be None, so they will not be copied
|
memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||||
memo[id(self.gradient)] = None # reset the gradient
|
memo[id(self.gradient)] = None # reset the gradient
|
||||||
memo[id(self.param_array)] = None # and param_array
|
memo[id(self.param_array)] = None # and param_array
|
||||||
|
memo[id(self.optimizer_array)] = None # and param_array
|
||||||
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||||
c = copy.deepcopy(self, memo) # and start the copy
|
c = copy.deepcopy(self, memo) # and start the copy
|
||||||
c._parent_index_ = None
|
c._parent_index_ = None
|
||||||
|
c._trigger_params_changed()
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def __deepcopy__(self, memo):
|
def __deepcopy__(self, memo):
|
||||||
s = self.__new__(self.__class__) # fresh instance
|
s = self.__new__(self.__class__) # fresh instance
|
||||||
memo[id(self)] = s # be sure to break all cycles --> self is already done
|
memo[id(self)] = s # be sure to break all cycles --> self is already done
|
||||||
import copy
|
import copy
|
||||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo)) # standard copy
|
s.__setstate__(copy.deepcopy(self.__getstate__(), memo)) # standard copy
|
||||||
return s
|
return s
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
ignore_list = ['_param_array_', # parameters get set from bottom to top
|
ignore_list = ['_param_array_', # parameters get set from bottom to top
|
||||||
'_gradient_array_', # as well as gradients
|
'_gradient_array_', # as well as gradients
|
||||||
|
'_optimizer_copy_',
|
||||||
|
'logger',
|
||||||
'_fixes_', # and fixes
|
'_fixes_', # and fixes
|
||||||
'_Cacher_wrap__cachers', # never pickle cachers
|
'_Cacher_wrap__cachers', # never pickle cachers
|
||||||
]
|
]
|
||||||
|
|
@ -234,7 +245,8 @@ class Pickleable(object):
|
||||||
|
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
self.__dict__.update(state)
|
self.__dict__.update(state)
|
||||||
return self
|
self._transformed = True
|
||||||
|
|
||||||
|
|
||||||
class Gradcheckable(Pickleable, Parentable):
|
class Gradcheckable(Pickleable, Parentable):
|
||||||
"""
|
"""
|
||||||
|
|
@ -324,7 +336,6 @@ class Indexable(Nameable, Observable):
|
||||||
self._default_constraint_ = default_constraint
|
self._default_constraint_ = default_constraint
|
||||||
from index_operations import ParameterIndexOperations
|
from index_operations import ParameterIndexOperations
|
||||||
self.constraints = ParameterIndexOperations()
|
self.constraints = ParameterIndexOperations()
|
||||||
self._old_constraints = ParameterIndexOperations()
|
|
||||||
self.priors = ParameterIndexOperations()
|
self.priors = ParameterIndexOperations()
|
||||||
if self._default_constraint_ is not None:
|
if self._default_constraint_ is not None:
|
||||||
self.constrain(self._default_constraint_)
|
self.constrain(self._default_constraint_)
|
||||||
|
|
@ -617,36 +628,103 @@ class OptimizationHandlable(Indexable):
|
||||||
"""
|
"""
|
||||||
def __init__(self, name, default_constraint=None, *a, **kw):
|
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||||
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
|
||||||
|
self._optimizer_copy_ = None
|
||||||
|
self._transformed = True
|
||||||
|
|
||||||
def _get_params_transformed(self):
|
#===========================================================================
|
||||||
# transformed parameters (apply un-transformation rules)
|
# Optimizer copy
|
||||||
p = self.param_array.copy()
|
#===========================================================================
|
||||||
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
@property
|
||||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
def optimizer_array(self):
|
||||||
fixes = np.ones(self.size).astype(bool)
|
|
||||||
fixes[self.constraints[__fixed__]] = FIXED
|
|
||||||
return p[fixes]
|
|
||||||
elif self._has_fixes():
|
|
||||||
return p[self._fixes_]
|
|
||||||
return p
|
|
||||||
|
|
||||||
def _set_params_transformed(self, p):
|
|
||||||
"""
|
"""
|
||||||
Set parameters p, but make sure they get transformed before setting.
|
Array for the optimizer to work on.
|
||||||
This means, the optimizer sees p, whereas the model sees transformed(p),
|
This array always lives in the space for the optimizer.
|
||||||
such that, the parameters the model sees are in the right domain.
|
Thus, it is untransformed, going from Transformations.
|
||||||
|
|
||||||
|
Setting this array, will make sure the transformed parameters for this model
|
||||||
|
will be set accordingly. It has to be set with an array, retrieved from
|
||||||
|
this method, as e.g. fixing will resize the array.
|
||||||
|
|
||||||
|
The optimizer should only interfere with this array, such that transofrmations
|
||||||
|
are secured.
|
||||||
"""
|
"""
|
||||||
if not(p is self.param_array):
|
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
|
||||||
|
self._optimizer_copy_ = np.empty(self.size)
|
||||||
|
|
||||||
|
if self._transformed:
|
||||||
|
self._optimizer_copy_.flat = self.param_array.flat
|
||||||
|
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
fixes = np.ones(self.size).astype(bool)
|
fixes = np.ones(self.size).astype(bool)
|
||||||
fixes[self.constraints[__fixed__]] = FIXED
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
self.param_array.flat[fixes] = p
|
return self._optimizer_copy_[fixes]
|
||||||
elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
elif self._has_fixes():
|
||||||
else: self.param_array.flat = p
|
return self._optimizer_copy_[self._fixes_]
|
||||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
self._transformed = False
|
||||||
|
|
||||||
|
return self._optimizer_copy_
|
||||||
|
|
||||||
|
@optimizer_array.setter
|
||||||
|
def optimizer_array(self, p):
|
||||||
|
"""
|
||||||
|
Make sure the optimizer copy does not get touched, thus, we only want to
|
||||||
|
set the values *inside* not the array itself.
|
||||||
|
|
||||||
|
Also we want to update param_array in here.
|
||||||
|
"""
|
||||||
|
if self.__dict__.get('_optimizer_copy_', None) is None or self.size != self._optimizer_copy_.size:
|
||||||
|
self._optimizer_copy_ = np.empty(self.size)
|
||||||
|
|
||||||
|
self._optimizer_copy_.flat = self.param_array.flat
|
||||||
|
|
||||||
|
if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
|
fixes = np.ones(self.size).astype(bool)
|
||||||
|
fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
self._optimizer_copy_.flat[fixes] = p
|
||||||
|
elif self._has_fixes(): self._optimizer_copy_.flat[self._fixes_] = p
|
||||||
|
else: self._optimizer_copy_.flat = p
|
||||||
|
|
||||||
|
self.param_array.flat = self._optimizer_copy_.flat
|
||||||
|
|
||||||
|
[np.put(self.param_array, ind, c.f(self._optimizer_copy_.flat[ind]))
|
||||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
|
||||||
|
self._transformed = True
|
||||||
|
|
||||||
self._trigger_params_changed()
|
self._trigger_params_changed()
|
||||||
|
|
||||||
|
def _get_params_transformed(self):
|
||||||
|
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
|
||||||
|
# # transformed parameters (apply un-transformation rules)
|
||||||
|
# p = self.param_array.copy()
|
||||||
|
# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
|
# fixes = np.ones(self.size).astype(bool)
|
||||||
|
# fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
# return p[fixes]
|
||||||
|
# elif self._has_fixes():
|
||||||
|
# return p[self._fixes_]
|
||||||
|
# return p
|
||||||
|
#
|
||||||
|
def _set_params_transformed(self, p):
|
||||||
|
raise DeprecationWarning, "_get|set_params{_transformed} is deprecated, use self.optimizer array insetad!"
|
||||||
|
|
||||||
|
# """
|
||||||
|
# Set parameters p, but make sure they get transformed before setting.
|
||||||
|
# This means, the optimizer sees p, whereas the model sees transformed(p),
|
||||||
|
# such that, the parameters the model sees are in the right domain.
|
||||||
|
# """
|
||||||
|
# if not(p is self.param_array):
|
||||||
|
# if self.has_parent() and self.constraints[__fixed__].size != 0:
|
||||||
|
# fixes = np.ones(self.size).astype(bool)
|
||||||
|
# fixes[self.constraints[__fixed__]] = FIXED
|
||||||
|
# self.param_array.flat[fixes] = p
|
||||||
|
# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
|
||||||
|
# else: self.param_array.flat = p
|
||||||
|
# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||||
|
# for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||||
|
# self._trigger_params_changed()
|
||||||
|
|
||||||
def _trigger_params_changed(self, trigger_parent=True):
|
def _trigger_params_changed(self, trigger_parent=True):
|
||||||
"""
|
"""
|
||||||
First tell all children to update,
|
First tell all children to update,
|
||||||
|
|
@ -725,7 +803,7 @@ class OptimizationHandlable(Indexable):
|
||||||
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
|
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
|
||||||
# now draw from prior where possible
|
# now draw from prior where possible
|
||||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||||
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# For shared memory arrays. This does nothing in Param, but sets the memory
|
# For shared memory arrays. This does nothing in Param, but sets the memory
|
||||||
|
|
@ -784,6 +862,7 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self.parameters = ArrayList()
|
self.parameters = ArrayList()
|
||||||
self._param_array_ = None
|
self._param_array_ = None
|
||||||
self._added_names_ = set()
|
self._added_names_ = set()
|
||||||
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
self.__visited = False # for traversing in reverse order we need to know if we were here already
|
self.__visited = False # for traversing in reverse order we need to know if we were here already
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -894,6 +973,11 @@ class Parameterizable(OptimizationHandlable):
|
||||||
self._remove_parameter_name(None, old_name)
|
self._remove_parameter_name(None, old_name)
|
||||||
self._add_parameter_name(param)
|
self._add_parameter_name(param)
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
super(Parameterizable, self).__setstate__(state)
|
||||||
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
return self
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# notification system
|
# notification system
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ class BayesianGPLVM(SparseGP):
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||||
self.logger = logging.getLogger("Bayesian GPLVM <{}>".format(hex(id(self))))
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
if X == None:
|
if X == None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
self.logger.info("initializing latent space X with method {}".format(init))
|
self.logger.info("initializing latent space X with method {}".format(init))
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ class MRD(SparseGP):
|
||||||
inference_method=None, likelihoods=None, name='mrd', Ynames=None):
|
inference_method=None, likelihoods=None, name='mrd', Ynames=None):
|
||||||
super(GP, self).__init__(name)
|
super(GP, self).__init__(name)
|
||||||
|
|
||||||
self.logger = logging.getLogger("MRD <{}>".format(hex(id(self))))
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
self.input_dim = input_dim
|
self.input_dim = input_dim
|
||||||
self.num_inducing = num_inducing
|
self.num_inducing = num_inducing
|
||||||
|
|
||||||
|
|
@ -107,16 +107,16 @@ class MRD(SparseGP):
|
||||||
self.logger.info("building kernels")
|
self.logger.info("building kernels")
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
from ..kern import RBF
|
from ..kern import RBF
|
||||||
self.kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
|
kernels = [RBF(input_dim, ARD=1, lengthscale=fracs[i]) for i in range(len(Ylist))]
|
||||||
elif isinstance(kernel, Kern):
|
elif isinstance(kernel, Kern):
|
||||||
self.kernels = []
|
kernels = []
|
||||||
for i in range(len(Ylist)):
|
for i in range(len(Ylist)):
|
||||||
k = kernel.copy()
|
k = kernel.copy()
|
||||||
self.kernels.append(k)
|
kernels.append(k)
|
||||||
else:
|
else:
|
||||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||||
self.kernels = kernel
|
kernels = kernel
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.random.uniform(0.1, 0.2, X.shape)
|
X_variance = np.random.uniform(0.1, 0.2, X.shape)
|
||||||
|
|
@ -125,8 +125,8 @@ class MRD(SparseGP):
|
||||||
self.X = NormalPosterior(X, X_variance)
|
self.X = NormalPosterior(X, X_variance)
|
||||||
|
|
||||||
if likelihoods is None:
|
if likelihoods is None:
|
||||||
self.likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
|
likelihoods = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
|
||||||
else: self.likelihoods = likelihoods
|
else: likelihoods = likelihoods
|
||||||
|
|
||||||
self.logger.info("adding X and Z")
|
self.logger.info("adding X and Z")
|
||||||
self.add_parameters(self.X, self.Z)
|
self.add_parameters(self.X, self.Z)
|
||||||
|
|
@ -134,9 +134,8 @@ class MRD(SparseGP):
|
||||||
self.bgplvms = []
|
self.bgplvms = []
|
||||||
self.num_data = Ylist[0].shape[0]
|
self.num_data = Ylist[0].shape[0]
|
||||||
|
|
||||||
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, self.kernels, self.likelihoods, self.Ylist):
|
for i, n, k, l, Y in itertools.izip(itertools.count(), Ynames, kernels, likelihoods, Ylist):
|
||||||
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
|
assert Y.shape[0] == self.num_data, "All datasets need to share the number of datapoints, and those have to correspond to one another"
|
||||||
|
|
||||||
p = Parameterized(name=n)
|
p = Parameterized(name=n)
|
||||||
p.add_parameter(k)
|
p.add_parameter(k)
|
||||||
p.kern = k
|
p.kern = k
|
||||||
|
|
@ -154,19 +153,18 @@ class MRD(SparseGP):
|
||||||
self.posteriors = []
|
self.posteriors = []
|
||||||
self.Z.gradient[:] = 0.
|
self.Z.gradient[:] = 0.
|
||||||
self.X.gradient[:] = 0.
|
self.X.gradient[:] = 0.
|
||||||
for y, k, l, i in itertools.izip(self.Ylist, self.kernels, self.likelihoods, self.inference_method):
|
for y, b, i in itertools.izip(self.Ylist, self.bgplvms, self.inference_method):
|
||||||
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
||||||
|
k, l = b.kern, b.likelihood
|
||||||
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
||||||
|
|
||||||
self.posteriors.append(posterior)
|
self.posteriors.append(posterior)
|
||||||
self._log_marginal_likelihood += lml
|
self._log_marginal_likelihood += lml
|
||||||
|
|
||||||
# likelihoods gradients
|
# likelihoods gradients
|
||||||
self.logger.info("likelihood gradients")
|
|
||||||
l.update_gradients(grad_dict.pop('dL_dthetaL'))
|
l.update_gradients(grad_dict.pop('dL_dthetaL'))
|
||||||
|
|
||||||
#gradients wrt kernel
|
#gradients wrt kernel
|
||||||
self.logger.info("kernel gradients")
|
|
||||||
dL_dKmm = grad_dict.pop('dL_dKmm')
|
dL_dKmm = grad_dict.pop('dL_dKmm')
|
||||||
k.update_gradients_full(dL_dKmm, self.Z, None)
|
k.update_gradients_full(dL_dKmm, self.Z, None)
|
||||||
target = k.gradient.copy()
|
target = k.gradient.copy()
|
||||||
|
|
@ -174,7 +172,6 @@ class MRD(SparseGP):
|
||||||
k.gradient += target
|
k.gradient += target
|
||||||
|
|
||||||
#gradients wrt Z
|
#gradients wrt Z
|
||||||
self.logger.info("Z gradients")
|
|
||||||
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
|
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
|
||||||
self.Z.gradient += k.gradients_Z_expectations(
|
self.Z.gradient += k.gradients_Z_expectations(
|
||||||
grad_dict['dL_dpsi0'],
|
grad_dict['dL_dpsi0'],
|
||||||
|
|
@ -182,16 +179,15 @@ class MRD(SparseGP):
|
||||||
grad_dict['dL_dpsi2'],
|
grad_dict['dL_dpsi2'],
|
||||||
Z=self.Z, variational_posterior=self.X)
|
Z=self.Z, variational_posterior=self.X)
|
||||||
|
|
||||||
self.logger.info("X gradients")
|
|
||||||
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
|
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
|
||||||
self.X.mean.gradient += dL_dmean
|
self.X.mean.gradient += dL_dmean
|
||||||
self.X.variance.gradient += dL_dS
|
self.X.variance.gradient += dL_dS
|
||||||
|
|
||||||
# update for the KL divergence
|
|
||||||
self.posterior = self.posteriors[0]
|
self.posterior = self.posteriors[0]
|
||||||
self.kern = self.kernels[0]
|
self.kern = self.bgplvms[0].kern
|
||||||
self.likelihood = self.likelihoods[0]
|
self.likelihood = self.bgplvms[0].likelihood
|
||||||
|
|
||||||
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
||||||
|
|
@ -268,8 +264,8 @@ class MRD(SparseGP):
|
||||||
This predicts the output mean and variance for the dataset given in Ylist[Yindex]
|
This predicts the output mean and variance for the dataset given in Ylist[Yindex]
|
||||||
"""
|
"""
|
||||||
self.posterior = self.posteriors[Yindex]
|
self.posterior = self.posteriors[Yindex]
|
||||||
self.kern = self.kernels[Yindex]
|
self.kern = self.bgplvms[0].kern
|
||||||
self.likelihood = self.likelihoods[Yindex]
|
self.likelihood = self.bgplvms[0].likelihood
|
||||||
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
|
return super(MRD, self).predict(Xnew, full_cov, Y_metadata, kern)
|
||||||
|
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
|
|
@ -311,7 +307,7 @@ class MRD(SparseGP):
|
||||||
"""
|
"""
|
||||||
import sys
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
import matplotlib.pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
from ..plotting.matplot_dep import dim_reduction_plots
|
from ..plotting.matplot_dep import dim_reduction_plots
|
||||||
if "Yindex" not in predict_kwargs:
|
if "Yindex" not in predict_kwargs:
|
||||||
predict_kwargs['Yindex'] = 0
|
predict_kwargs['Yindex'] = 0
|
||||||
|
|
@ -333,10 +329,7 @@ class MRD(SparseGP):
|
||||||
return plot
|
return plot
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
# TODO:
|
state = super(MRD, self).__getstate__()
|
||||||
import copy
|
|
||||||
state = copy.copy(self.__dict__)
|
|
||||||
del state['kernels']
|
|
||||||
del state['kern']
|
del state['kern']
|
||||||
del state['likelihood']
|
del state['likelihood']
|
||||||
return state
|
return state
|
||||||
|
|
@ -344,7 +337,6 @@ class MRD(SparseGP):
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
# TODO:
|
# TODO:
|
||||||
super(MRD, self).__setstate__(state)
|
super(MRD, self).__setstate__(state)
|
||||||
self.kernels = [p.kern for p in self.bgplvms]
|
self.kern = self.bgplvms[0].kern
|
||||||
self.kern = self.kernels[0]
|
self.likelihood = self.bgplvms[0].likelihood
|
||||||
self.likelihood = self.likelihoods[0]
|
|
||||||
self.parameters_changed()
|
self.parameters_changed()
|
||||||
|
|
@ -16,8 +16,7 @@ from GPy.core.parameterization.priors import Gaussian
|
||||||
from GPy.kern._src.rbf import RBF
|
from GPy.kern._src.rbf import RBF
|
||||||
from GPy.kern._src.linear import Linear
|
from GPy.kern._src.linear import Linear
|
||||||
from GPy.kern._src.static import Bias, White
|
from GPy.kern._src.static import Bias, White
|
||||||
from GPy.examples.dimensionality_reduction import mrd_simulation,\
|
from GPy.examples.dimensionality_reduction import mrd_simulation
|
||||||
bgplvm_simulation
|
|
||||||
from GPy.examples.regression import toy_rbf_1d_50
|
from GPy.examples.regression import toy_rbf_1d_50
|
||||||
from GPy.core.parameterization.variational import NormalPosterior
|
from GPy.core.parameterization.variational import NormalPosterior
|
||||||
from GPy.models.gp_regression import GPRegression
|
from GPy.models.gp_regression import GPRegression
|
||||||
|
|
@ -90,6 +89,7 @@ class Test(ListDictTestCase):
|
||||||
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
|
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
|
||||||
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
|
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
|
||||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
|
pcopy.gradient = 10 # gradient does not get copied anymore
|
||||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||||
self.assertSequenceEqual(str(par), str(pcopy))
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
self.assertIsNot(par.param_array, pcopy.param_array)
|
self.assertIsNot(par.param_array, pcopy.param_array)
|
||||||
|
|
@ -151,6 +151,7 @@ class Test(ListDictTestCase):
|
||||||
par = NormalPosterior(X,Xv)
|
par = NormalPosterior(X,Xv)
|
||||||
par.gradient = 10
|
par.gradient = 10
|
||||||
pcopy = par.copy()
|
pcopy = par.copy()
|
||||||
|
pcopy.gradient = 10
|
||||||
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
|
||||||
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
|
||||||
self.assertSequenceEqual(str(par), str(pcopy))
|
self.assertSequenceEqual(str(par), str(pcopy))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue