Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
mzwiessele 2014-05-15 18:47:26 +01:00
commit 382645ff37
35 changed files with 943 additions and 508 deletions

View file

@ -10,7 +10,7 @@ from model import Model
from parameterization import ObsAr from parameterization import ObsAr
from .. import likelihoods from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
class GP(Model): class GP(Model):
@ -21,6 +21,7 @@ class GP(Model):
:param Y: output observations :param Y: output observations
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
:rtype: model object :rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
@ -220,3 +221,20 @@ class GP(Model):
""" """
return self.kern.input_sensitivity() return self.kern.input_sensitivity()
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:messages: whether to display during optimisation
:type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimizer: string
TODO: valid args
"""
self.inference_method.on_optimization_start()
super(GP, self).optimize(optimizer, start, **kwargs)
self.inference_method.on_optimization_end()

View file

@ -220,7 +220,7 @@ class Model(Parameterized):
if self.is_fixed: if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed" raise RuntimeError, "Cannot optimize, when everything is fixed"
if self.size == 0: if self.size == 0:
raise RuntimeError, "Model without parameters cannot be minimized" raise RuntimeError, "Model without parameters cannot be optimized"
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer

View file

@ -38,7 +38,12 @@ class ArrayList(list):
raise ValueError, "{} is not in list".format(item) raise ValueError, "{} is not in list".format(item)
pass pass
class ObservablesList(object): class ObserverList(object):
"""
A list which containts the observables.
It only holds weak references to observers, such that unbound
observers dont dangle in memory.
"""
def __init__(self): def __init__(self):
self._poc = [] self._poc = []
@ -46,27 +51,30 @@ class ObservablesList(object):
p,o,c = self._poc[ind] p,o,c = self._poc[ind]
return p, o(), c return p, o(), c
def remove(self, priority, observable, callble): def remove(self, priority, observer, callble):
""" """
Remove one observer, which had priority and callble.
""" """
self.flush() self.flush()
for i in range(len(self) - 1, -1, -1): for i in range(len(self) - 1, -1, -1):
p,o,c = self[i] p,o,c = self[i]
if priority==p and observable==o and callble==c: if priority==p and observer==o and callble==c:
del self._poc[i] del self._poc[i]
def __repr__(self): def __repr__(self):
return self._poc.__repr__() return self._poc.__repr__()
def add(self, priority, observable, callble): def add(self, priority, observer, callble):
if observable is not None: """
Add an observer with priority and callble
"""
if observer is not None:
ins = 0 ins = 0
for pr, _, _ in self: for pr, _, _ in self:
if priority > pr: if priority > pr:
break break
ins += 1 ins += 1
self._poc.insert(ins, (priority, weakref.ref(observable), callble)) self._poc.insert(ins, (priority, weakref.ref(observer), callble))
def __str__(self): def __str__(self):
ret = [] ret = []
@ -83,6 +91,9 @@ class ObservablesList(object):
return '\n'.join(ret) return '\n'.join(ret)
def flush(self): def flush(self):
"""
Make sure all weak references, which point to nothing are flushed (deleted)
"""
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
def __iter__(self): def __iter__(self):
@ -95,7 +106,7 @@ class ObservablesList(object):
return self._poc.__len__() return self._poc.__len__()
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
s = ObservablesList() s = ObserverList()
for p,o,c in self: for p,o,c in self:
import copy import copy
s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo)) s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))

View file

@ -4,7 +4,7 @@
import itertools import itertools
import numpy import numpy
np = numpy np = numpy
from parameter_core import OptimizationHandlable, adjust_name_for_printing from parameter_core import Parameterizable, adjust_name_for_printing
from observable_array import ObsAr from observable_array import ObsAr
###### printing ###### printing
@ -16,7 +16,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(OptimizationHandlable, ObsAr): class Param(Parameterizable, ObsAr):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -42,7 +42,7 @@ class Param(OptimizationHandlable, ObsAr):
""" """
__array_priority__ = -1 # Never give back Param __array_priority__ = -1 # Never give back Param
_fixes_ = None _fixes_ = None
_parameters_ = [] parameters = []
def __new__(cls, name, input_array, default_constraint=None): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
@ -87,6 +87,9 @@ class Param(OptimizationHandlable, ObsAr):
@property @property
def param_array(self): def param_array(self):
"""
As we are a leaf, this just returns self
"""
return self return self
@property @property
@ -139,6 +142,9 @@ class Param(OptimizationHandlable, ObsAr):
def _raveled_index_for(self, obj): def _raveled_index_for(self, obj):
return self._raveled_index() return self._raveled_index()
#===========================================================================
# Index recreation
#===========================================================================
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -147,6 +153,8 @@ class Param(OptimizationHandlable, ObsAr):
slice_index = self._current_slice_ slice_index = self._current_slice_
def f(a): def f(a):
a, b = a a, b = a
if isinstance(a, numpy.ndarray) and a.dtype == bool:
raise ValueError, "Boolean indexing not implemented, use Param[np.where(index)] to index by boolean arrays!"
if a not in (slice(None), Ellipsis): if a not in (slice(None), Ellipsis):
if isinstance(a, slice): if isinstance(a, slice):
start, stop, step = a.indices(b) start, stop, step = a.indices(b)
@ -175,15 +183,17 @@ class Param(OptimizationHandlable, ObsAr):
This will function will just call visit on self, as Param are leaf nodes. This will function will just call visit on self, as Param are leaf nodes.
""" """
self.__visited = True
visit(self, *args, **kwargs) visit(self, *args, **kwargs)
self.__visited = False
def traverse_parents(self, visit, *args, **kwargs): def traverse_parents(self, visit, *args, **kwargs):
""" """
Traverse the hierarchy upwards, visiting all parents and their children, except self. Traverse the hierarchy upwards, visiting all parents and their children, except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion. See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example: Example:
parents = [] parents = []
self.traverse_parents(parents.append) self.traverse_parents(parents.append)
print parents print parents

View file

@ -17,7 +17,7 @@ from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED,
import numpy as np import numpy as np
import re import re
__updated__ = '2014-05-12' __updated__ = '2014-05-15'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -52,13 +52,23 @@ class Observable(object):
_updated = True _updated = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__() super(Observable, self).__init__()
from lists_and_dicts import ObservablesList from lists_and_dicts import ObserverList
self.observers = ObservablesList() self.observers = ObserverList()
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
"""
Add an observer `observer` with the callback `callble`
and priority `priority` to this observers list.
"""
self.observers.add(priority, observer, callble) self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None): def remove_observer(self, observer, callble=None):
"""
Either (if callble is None) remove all callables,
which were added alongside observer,
or remove callable `callble` which was added alongside
the observer `observer`.
"""
to_remove = [] to_remove = []
for poc in self.observers: for poc in self.observers:
_, obs, clble = poc _, obs, clble = poc
@ -91,10 +101,6 @@ class Observable(object):
break break
callble(self, which=which) callble(self, which=which)
#===============================================================================
# Foundation framework for parameterized and param objects:
#===============================================================================
class Parentable(object): class Parentable(object):
""" """
Enable an Object to have a parent. Enable an Object to have a parent.
@ -172,7 +178,11 @@ class Pickleable(object):
# copy and pickling # copy and pickling
#=========================================================================== #===========================================================================
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model""" """
Returns a (deep) copy of the current parameter handle.
All connections to parents of the copy will be cut.
"""
#raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy import copy
memo = {} memo = {}
@ -196,12 +206,11 @@ class Pickleable(object):
return s return s
def __getstate__(self): def __getstate__(self):
ignore_list = ([#'_parent_', '_parent_index_', ignore_list = ['_param_array_', # parameters get set from bottom to top
#'observers', '_gradient_array_', # as well as gradients
'_param_array_', '_gradient_array_', '_fixes_', '_fixes_', # and fixes
'_Cacher_wrap__cachers'] '_Cacher_wrap__cachers', # never pickle cachers
#+ self.parameter_names(recursive=False) ]
)
dc = dict() dc = dict()
for k,v in self.__dict__.iteritems(): for k,v in self.__dict__.iteritems():
if k not in ignore_list: if k not in ignore_list:
@ -246,7 +255,6 @@ class Gradcheckable(Pickleable, Parentable):
""" """
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!" raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
class Nameable(Gradcheckable): class Nameable(Gradcheckable):
""" """
Make an object nameable inside the hierarchy. Make an object nameable inside the hierarchy.
@ -285,41 +293,8 @@ class Nameable(Gradcheckable):
return self._parent_.hierarchy_name() + "." + adjust(self.name) return self._parent_.hierarchy_name() + "." + adjust(self.name)
return adjust(self.name) return adjust(self.name)
class Indexable(object):
"""
Enable enraveled indexes and offsets for this object.
The raveled index of an object is the index for its parameters in a flattened int array.
"""
def __init__(self, *a, **kw):
super(Indexable, self).__init__()
def _raveled_index(self): class Indexable(Nameable, Observable):
"""
Flattened array of ints, specifying the index of this object.
This has to account for shaped parameters!
"""
raise NotImplementedError, "Need to be able to get the raveled Index"
def _offset_for(self, param):
"""
Return the offset of the param inside this parameterized object.
This does not need to account for shaped parameters, as it
basically just sums up the parameter sizes which come before param.
"""
return 0
#raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
def _raveled_index_for(self, param):
"""
get the raveled index for a param
that is an int array, containing the indexes for the flattened
param inside this parameterized logic.
"""
return param._raveled_index()
#raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
class Constrainable(Nameable, Indexable, Observable):
""" """
Make an object constrainable with Priors and Transformations. Make an object constrainable with Priors and Transformations.
TODO: Mappings!! TODO: Mappings!!
@ -330,7 +305,7 @@ class Constrainable(Nameable, Indexable, Observable):
:func:`constrain()` and :func:`unconstrain()` are main methods here :func:`constrain()` and :func:`unconstrain()` are main methods here
""" """
def __init__(self, name, default_constraint=None, *a, **kw): def __init__(self, name, default_constraint=None, *a, **kw):
super(Constrainable, self).__init__(name=name, *a, **kw) super(Indexable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations() self.constraints = ParameterIndexOperations()
@ -352,6 +327,39 @@ class Constrainable(Nameable, Indexable, Observable):
self._connect_fixes() self._connect_fixes()
self._notify_parent_change() self._notify_parent_change()
#===========================================================================
# Indexable
#===========================================================================
def _offset_for(self, param):
"""
Return the offset of the param inside this parameterized object.
This does not need to account for shaped parameters, as it
basically just sums up the parameter sizes which come before param.
"""
if param.has_parent():
if param._parent_._get_original(param) in self.parameters:
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
return 0
def _raveled_index_for(self, param):
"""
get the raveled index for a param
that is an int array, containing the indexes for the flattened
param inside this parameterized logic.
"""
from param import ParamConcatenation
if isinstance(param, ParamConcatenation):
return np.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param)
def _raveled_index(self):
"""
Flattened array of ints, specifying the index of this object.
This has to account for shaped parameters!
"""
return np.r_[:self.size]
#=========================================================================== #===========================================================================
# Fixing Parameters: # Fixing Parameters:
#=========================================================================== #===========================================================================
@ -406,9 +414,24 @@ class Constrainable(Nameable, Indexable, Observable):
self._fixes_ = None self._fixes_ = None
del self.constraints[__fixed__] del self.constraints[__fixed__]
#===========================================================================
# Convenience for fixed
#===========================================================================
def _has_fixes(self): def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
@property
def is_fixed(self):
for p in self.parameters:
if not p.is_fixed: return False
return True
def _get_original(self, param):
# if advanced indexing is activated it happens that the array is a copy
# you can retrieve the original param through this method, by passing
# the copy here
return self.parameters[param._parent_index_]
#=========================================================================== #===========================================================================
# Prior Operations # Prior Operations
#=========================================================================== #===========================================================================
@ -432,8 +455,7 @@ class Constrainable(Nameable, Indexable, Observable):
def unset_priors(self, *priors): def unset_priors(self, *priors):
""" """
Un-set all priors given from this parameter handle. Un-set all priors given (in *priors) from this parameter handle.
""" """
return self._remove_from_index_operations(self.priors, priors) return self._remove_from_index_operations(self.priors, priors)
@ -535,7 +557,7 @@ class Constrainable(Nameable, Indexable, Observable):
self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size)
self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size) self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size)
self._fixes_ = None self._fixes_ = None
for p in self._parameters_: for p in self.parameters:
p._parent_changed(parent) p._parent_changed(parent)
def _add_to_index_operations(self, which, reconstrained, what, warning): def _add_to_index_operations(self, which, reconstrained, what, warning):
@ -563,14 +585,13 @@ class Constrainable(Nameable, Indexable, Observable):
removed = np.empty((0,), dtype=int) removed = np.empty((0,), dtype=int)
for t in transforms: for t in transforms:
unconstrained = which.remove(t, self._raveled_index()) unconstrained = which.remove(t, self._raveled_index())
print unconstrained
removed = np.union1d(removed, unconstrained) removed = np.union1d(removed, unconstrained)
if t is __fixed__: if t is __fixed__:
self._highest_parent_._set_unfixed(self, unconstrained) self._highest_parent_._set_unfixed(self, unconstrained)
return removed return removed
class OptimizationHandlable(Constrainable): class OptimizationHandlable(Indexable):
""" """
This enables optimization handles on an Object as done in GPy 0.4. This enables optimization handles on an Object as done in GPy 0.4.
@ -580,7 +601,7 @@ class OptimizationHandlable(Constrainable):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def _get_params_transformed(self): def _get_params_transformed(self):
# transformed parameters (apply transformation rules) # transformed parameters (apply un-transformation rules)
p = self.param_array.copy() p = self.param_array.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and self.constraints[__fixed__].size != 0:
@ -592,6 +613,11 @@ class OptimizationHandlable(Constrainable):
return p return p
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
"""
Set parameters p, but make sure they get transformed before setting.
This means, the optimizer sees p, whereas the model sees transformed(p),
such that, the parameters the model sees are in the right domain.
"""
if not(p is self.param_array): if not(p is self.param_array):
if self.has_parent() and self.constraints[__fixed__].size != 0: if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
@ -604,12 +630,33 @@ class OptimizationHandlable(Constrainable):
self._trigger_params_changed() self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
[p._trigger_params_changed(trigger_parent=False) for p in self._parameters_] """
First tell all children to update,
then update yourself.
If trigger_parent is True, we will tell the parent, otherwise not.
"""
[p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
self.notify_observers(None, None if trigger_parent else -np.inf) self.notify_observers(None, None if trigger_parent else -np.inf)
def _size_transformed(self): def _size_transformed(self):
"""
As fixes are not passed to the optimiser, the size of the model for the optimiser
is the size of all parameters minus the size of the fixes.
"""
return self.size - self.constraints[__fixed__].size return self.size - self.constraints[__fixed__].size
def _transform_gradients(self, g):
"""
Transform the gradients by multiplying the gradient factor for each
constraint to it.
"""
if self.has_parent():
return g
[np.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g
@property @property
def num_params(self): def num_params(self):
""" """
@ -628,8 +675,8 @@ class OptimizationHandlable(Constrainable):
""" """
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x else: adjust = lambda x: x
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] if recursive: names = [xi for x in self.parameters for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
else: names = [adjust(x.name) for x in self._parameters_] else: names = [adjust(x.name) for x in self.parameters]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names return names
@ -651,7 +698,7 @@ class OptimizationHandlable(Constrainable):
Randomize the model. Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator Make this draw from the prior if one exists, else draw from given random generator
:param rand_gen: numpy random number generator which takes args and kwargs :param rand_gen: np random number generator which takes args and kwargs
:param flaot loc: loc parameter for random number generator :param flaot loc: loc parameter for random number generator
:param float scale: scale parameter for random number generator :param float scale: scale parameter for random number generator
:param args, kwargs: will be passed through to random number generator :param args, kwargs: will be passed through to random number generator
@ -667,7 +714,7 @@ class OptimizationHandlable(Constrainable):
# for all parameterized objects # for all parameterized objects
#=========================================================================== #===========================================================================
@property @property
def full_gradient(self): def gradient_full(self):
""" """
Note to users: Note to users:
This does not return the gradient in the right shape! Use self.gradient This does not return the gradient in the right shape! Use self.gradient
@ -681,26 +728,43 @@ class OptimizationHandlable(Constrainable):
return self._gradient_array_ return self._gradient_array_
def _propagate_param_grad(self, parray, garray): def _propagate_param_grad(self, parray, garray):
"""
For propagating the param_array and gradient_array.
This ensures the in memory view of each subsequent array.
1.) connect param_array of children to self.param_array
2.) tell all children to propagate further
"""
pi_old_size = 0 pi_old_size = 0
for pi in self._parameters_: for pi in self.parameters:
pislice = slice(pi_old_size, pi_old_size + pi.size) pislice = slice(pi_old_size, pi_old_size + pi.size)
self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat
self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat self.gradient_full[pislice] = pi.gradient_full.flat # , requirements=['C', 'W']).flat
pi.param_array.data = parray[pislice].data pi.param_array.data = parray[pislice].data
pi.full_gradient.data = garray[pislice].data pi.gradient_full.data = garray[pislice].data
pi._propagate_param_grad(parray[pislice], garray[pislice]) pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size pi_old_size += pi.size
class Parameterizable(OptimizationHandlable): class Parameterizable(OptimizationHandlable):
"""
A parameterisable class.
This class provides the parameters list (ArrayList) and standard parameter handling,
such as {add|remove}_parameter(), traverse hierarchy and param_array, gradient_array
and the empty parameters_changed().
This class is abstract and should not be instantiated.
Use GPy.core.Parameterized() as node (or leaf) in the parameterized hierarchy.
Use GPy.core.Param() for a leaf in the parameterized hierarchy.
"""
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.lists_and_dicts import ArrayList from GPy.core.parameterization.lists_and_dicts import ArrayList
self._parameters_ = ArrayList() self.parameters = ArrayList()
self._param_array_ = None self._param_array_ = None
self.size = 0
self._added_names_ = set() self._added_names_ = set()
self.__visited = False # for traversing in reverse order we need to know if we were here already self.__visited = False # for traversing in reverse order we need to know if we were here already
@ -735,7 +799,7 @@ class Parameterizable(OptimizationHandlable):
if not self.__visited: if not self.__visited:
visit(self, *args, **kwargs) visit(self, *args, **kwargs)
self.__visited = True self.__visited = True
for c in self._parameters_: for c in self.parameters:
c.traverse(visit, *args, **kwargs) c.traverse(visit, *args, **kwargs)
self.__visited = False self.__visited = False
@ -743,9 +807,9 @@ class Parameterizable(OptimizationHandlable):
""" """
Traverse the hierarchy upwards, visiting all parents and their children except self. Traverse the hierarchy upwards, visiting all parents and their children except self.
See "visitor pattern" in literature. This is implemented in pre-order fashion. See "visitor pattern" in literature. This is implemented in pre-order fashion.
Example: Example:
parents = [] parents = []
self.traverse_parents(parents.append) self.traverse_parents(parents.append)
print parents print parents
@ -754,7 +818,7 @@ class Parameterizable(OptimizationHandlable):
self.__visited = True self.__visited = True
self._parent_._traverse_parents(visit, *args, **kwargs) self._parent_._traverse_parents(visit, *args, **kwargs)
self.__visited = False self.__visited = False
def _traverse_parents(self, visit, *args, **kwargs): def _traverse_parents(self, visit, *args, **kwargs):
if not self.__visited: if not self.__visited:
self.__visited = True self.__visited = True
@ -779,7 +843,7 @@ class Parameterizable(OptimizationHandlable):
@property @property
def num_params(self): def num_params(self):
return len(self._parameters_) return len(self.parameters)
def _add_parameter_name(self, param, ignore_added_names=False): def _add_parameter_name(self, param, ignore_added_names=False):
pname = adjust_name_for_printing(param.name) pname = adjust_name_for_printing(param.name)
@ -812,132 +876,6 @@ class Parameterizable(OptimizationHandlable):
self._remove_parameter_name(None, old_name) self._remove_parameter_name(None, old_name)
self._add_parameter_name(param) self._add_parameter_name(param)
def add_parameter(self, param, index=None, _ignore_added_names=False):
"""
:param parameters: the parameters to add
:type parameters: list of or one :py:class:`GPy.core.param.Param`
:param [index]: index of where to put parameters
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax
"""
if param in self._parameters_ and index is not None:
self.remove_parameter(param)
self.add_parameter(param, index)
# elif param.has_parent():
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_:
if param.has_parent():
def visit(parent, self):
if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
param.traverse_parents(visit, self)
param._parent_.remove_parameter(param)
# make sure the size is set
if index is None:
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
self._parameters_.append(param)
else:
start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self._parameters_.insert(index, param)
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
parent = self
while parent is not None:
parent.size += param.size
parent = parent._parent_
self._connect_parameters()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
else:
raise HierarchyError, """Parameter exists already and no copy made"""
def add_parameters(self, *parameters):
"""
convenience method for adding several
parameters without gradient specification
"""
[self.add_parameter(p) for p in parameters]
def remove_parameter(self, param):
"""
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self._parameters_[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent.size -= param.size
parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
if self.param_array.size != self.size:
self.param_array = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0
self._param_slices_ = []
for i, p in enumerate(self._parameters_):
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size + p.size)
# first connect all children
p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice])
# then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C')
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p.param_array.data = self.param_array[pslice].data
p.full_gradient.data = self.full_gradient[pslice].data
self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size
#=========================================================================== #===========================================================================
# notification system # notification system
#=========================================================================== #===========================================================================
@ -947,30 +885,13 @@ class Parameterizable(OptimizationHandlable):
self.notify_observers(which=which) self.notify_observers(which=which)
#=========================================================================== #===========================================================================
# Pickling
#===========================================================================
def __setstate__(self, state):
super(Parameterizable, self).__setstate__(state)
self._connect_parameters()
self._connect_fixes()
self._notify_parent_change()
self.parameters_changed()
def copy(self):
c = super(Parameterizable, self).copy()
c._connect_parameters()
c._connect_fixes()
c._notify_parent_change()
return c
#===========================================================================
# From being parentable, we have to define the parent_change notification # From being parentable, we have to define the parent_change notification
#=========================================================================== #===========================================================================
def _notify_parent_change(self): def _notify_parent_change(self):
""" """
Notify all parameters that the parent has changed Notify all parameters that the parent has changed
""" """
for p in self._parameters_: for p in self.parameters:
p._parent_changed(self) p._parent_changed(self)
def parameters_changed(self): def parameters_changed(self):

View file

@ -3,13 +3,10 @@
import numpy; np = numpy import numpy; np = numpy
import cPickle
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation from param import ParamConcatenation
from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
from transformations import __fixed__
from lists_and_dicts import ArrayList
class ParametersChangedMeta(type): class ParametersChangedMeta(type):
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
@ -68,8 +65,7 @@ class Parameterized(Parameterizable):
def __init__(self, name=None, parameters=[], *a, **kw): def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw) super(Parameterized, self).__init__(name=name, *a, **kw)
self._in_init_ = True self._in_init_ = True
self._parameters_ = ArrayList() self.size = sum(p.size for p in self.parameters)
self.size = sum(p.size for p in self._parameters_)
self.add_observer(self, self._parameters_changed_notification, -100) self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
@ -86,7 +82,7 @@ class Parameterized(Parameterizable):
iamroot=True iamroot=True
node = pydot.Node(id(self), shape='box', label=self.name)#, color='white') node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
G.add_node(node) G.add_node(node)
for child in self._parameters_: for child in self.parameters:
child_node = child.build_pydot(G) child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node))#, color='white')) G.add_edge(pydot.Edge(node, child_node))#, color='white'))
@ -102,58 +98,133 @@ class Parameterized(Parameterizable):
return node return node
#=========================================================================== #===========================================================================
# Gradient control # Add remove parameters:
#=========================================================================== #===========================================================================
def _transform_gradients(self, g): def add_parameter(self, param, index=None, _ignore_added_names=False):
if self.has_parent():
return g
[numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_]
return g
#===========================================================================
# Indexable
#===========================================================================
def _offset_for(self, param):
# get the offset in the parameterized index array for param
if param.has_parent():
if param._parent_._get_original(param) in self._parameters_:
return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
return self._offset_for(param._parent_) + param._parent_._offset_for(param)
return 0
def _raveled_index_for(self, param):
""" """
get the raveled index for a param :param parameters: the parameters to add
that is an int array, containing the indexes for the flattened :type parameters: list of or one :py:class:`GPy.core.param.Param`
param inside this parameterized logic. :param [index]: index of where to put parameters
"""
if isinstance(param, ParamConcatenation):
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param)
def _raveled_index(self): :param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
"""
get the raveled index for this object,
this is not in the global view of things!
"""
return numpy.r_[:self.size]
#=========================================================================== Add all parameters to this param class, you can insert parameters
# Convenience for fixed, tied checking of param: at any given index using the :func:`list.insert` syntax
#=========================================================================== """
@property if param in self.parameters and index is not None:
def is_fixed(self): self.remove_parameter(param)
for p in self._parameters_: self.add_parameter(param, index)
if not p.is_fixed: return False # elif param.has_parent():
return True # raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self.parameters:
if param.has_parent():
def visit(parent, self):
if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
param.traverse_parents(visit, self)
param._parent_.remove_parameter(param)
# make sure the size is set
if index is None:
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
self.parameters.append(param)
else:
start = sum(p.size for p in self.parameters[:index])
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self.parameters.insert(index, param)
def _get_original(self, param): param.add_observer(self, self._pass_through_notify_observers, -np.inf)
# if advanced indexing is activated it happens that the array is a copy
# you can retrieve the original param through this method, by passing parent = self
# the copy here while parent is not None:
return self._parameters_[param._parent_index_] parent.size += param.size
parent = parent._parent_
self._connect_parameters()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
else:
raise HierarchyError, """Parameter exists already and no copy made"""
def add_parameters(self, *parameters):
"""
convenience method for adding several
parameters without gradient specification
"""
[self.add_parameter(p) for p in parameters]
def remove_parameter(self, param):
"""
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self.parameters:
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self.parameters[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent.size -= param.size
parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "parameters") or len(self.parameters) < 1:
# no parameters for this class
return
if self.param_array.size != self.size:
self.param_array = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0
self._param_slices_ = []
for i, p in enumerate(self.parameters):
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size + p.size)
# first connect all children
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
# then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p.param_array.data = self.param_array[pslice].data
p.gradient_full.data = self.gradient_full[pslice].data
self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size
#=========================================================================== #===========================================================================
# Get/set parameters: # Get/set parameters:
@ -200,10 +271,28 @@ class Parameterized(Parameterizable):
def __setattr__(self, name, val): def __setattr__(self, name, val):
# override the default behaviour, if setting a param, so broadcasting can by used # override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, "_parameters_"): if hasattr(self, "parameters"):
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
if name in pnames: self._parameters_[pnames.index(name)][:] = val; return if name in pnames: self.parameters[pnames.index(name)][:] = val; return
object.__setattr__(self, name, val); object.__setattr__(self, name, val);
#===========================================================================
# Pickling
#===========================================================================
def __setstate__(self, state):
super(Parameterized, self).__setstate__(state)
self._connect_parameters()
self._connect_fixes()
self._notify_parent_change()
self.parameters_changed()
def copy(self):
c = super(Parameterized, self).copy()
c._connect_parameters()
c._connect_fixes()
c._notify_parent_change()
return c
#=========================================================================== #===========================================================================
# Printing: # Printing:
#=========================================================================== #===========================================================================
@ -211,22 +300,22 @@ class Parameterized(Parameterizable):
return self.hierarchy_name() return self.hierarchy_name()
@property @property
def flattened_parameters(self): def flattened_parameters(self):
return [xi for x in self._parameters_ for xi in x.flattened_parameters] return [xi for x in self.parameters for xi in x.flattened_parameters]
@property @property
def _parameter_sizes_(self): def _parameter_sizes_(self):
return [x.size for x in self._parameters_] return [x.size for x in self.parameters]
@property @property
def parameter_shapes(self): def parameter_shapes(self):
return [xi for x in self._parameters_ for xi in x.parameter_shapes] return [xi for x in self.parameters for xi in x.parameter_shapes]
@property @property
def _constraints_str(self): def _constraints_str(self):
return [cs for p in self._parameters_ for cs in p._constraints_str] return [cs for p in self.parameters for cs in p._constraints_str]
@property @property
def _priors_str(self): def _priors_str(self):
return [cs for p in self._parameters_ for cs in p._priors_str] return [cs for p in self.parameters for cs in p._priors_str]
@property @property
def _description_str(self): def _description_str(self):
return [xi for x in self._parameters_ for xi in x._description_str] return [xi for x in self.parameters for xi in x._description_str]
@property @property
def _ties_str(self): def _ties_str(self):
return [','.join(x._ties_str) for x in self.flattened_parameters] return [','.join(x._ties_str) for x in self.flattened_parameters]
@ -246,7 +335,7 @@ class Parameterized(Parameterizable):
to_print = [] to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs): for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p)) to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
# to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self._parameters_, constrs, ts)] # to_print = [format_spec.format(p=p, const=c, t=t) if isinstance(p, Param) else p.__str__(header=False) for p, c, t in itertools.izip(self.parameters, constrs, ts)]
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3) sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header: if header:
header = " {{0:<{0}s}} | {{1:^{1}s}} | {{2:^{2}s}} | {{3:^{3}s}} | {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to") header = " {{0:<{0}s}} | {{1:^{1}s}} | {{2:^{2}s}} | {{3:^{3}s}} | {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to")

View file

@ -81,7 +81,7 @@ class VariationalPosterior(Parameterized):
def _raveled_index(self): def _raveled_index(self):
index = np.empty(dtype=int, shape=0) index = np.empty(dtype=int, shape=0)
size = 0 size = 0
for p in self._parameters_: for p in self.parameters:
index = np.hstack((index, p._raveled_index()+size)) index = np.hstack((index, p._raveled_index()+size))
size += p._realsize_ if hasattr(p, '_realsize_') else p.size size += p._realsize_ if hasattr(p, '_realsize_') else p.size
return index return index
@ -96,10 +96,10 @@ class VariationalPosterior(Parameterized):
dc = self.__dict__.copy() dc = self.__dict__.copy()
dc['mean'] = self.mean[s] dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s] dc['variance'] = self.variance[s]
dc['_parameters_'] = copy.copy(self._parameters_) dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc) n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean'] n.parameters[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance'] n.parameters[dc['variance']._parent_index_] = dc['variance']
n._gradient_array_ = None n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize n.size = n.mean.size + n.variance.size + oversize
@ -150,11 +150,11 @@ class SpikeAndSlabPosterior(VariationalPosterior):
dc['mean'] = self.mean[s] dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s] dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s] dc['binary_prob'] = self.binary_prob[s]
dc['_parameters_'] = copy.copy(self._parameters_) dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc) n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean'] n.parameters[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance'] n.parameters[dc['variance']._parent_index_] = dc['variance']
n._parameters_[dc['binary_prob']._parent_index_] = dc['binary_prob'] n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.ndim = n.mean.ndim n.ndim = n.mean.ndim
n.shape = n.mean.shape n.shape = n.mean.shape
n.num_data = n.mean.shape[0] n.num_data = n.mean.shape[0]

View file

@ -66,7 +66,7 @@ class SparseGP(GP):
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations( self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) self.grad_dict['dL_dpsi0'], self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)

View file

@ -96,15 +96,11 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
# Optimize # Optimize
if optimize: if optimize:
#m.update_likelihood_approximation()
# Parameters optimization:
try: try:
m.optimize('scg', messages=1) m.optimize('scg', messages=1)
except Exception as e: except Exception as e:
return m return m
#m.pseudo_EM()
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) fig, axes = pb.subplots(2, 1)
@ -133,10 +129,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
# Optimize # Optimize
if optimize: if optimize:
#m.update_likelihood_approximation() m.optimize()
# Parameters optimization:
#m.optimize()
m.pseudo_EM()
# Plot # Plot
if plot: if plot:

View file

@ -414,9 +414,10 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
ax = m.plot_latent() ax = m.plot_latent()
y = m.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
lvm_visualizer.close()
data_show.close()
return m return m
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
@ -516,9 +517,10 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
ax = m.plot_latent() ax = m.plot_latent()
y = m.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
lvm_visualizer.close() lvm_visualizer.close()
data_show.close()
return m return m

View file

@ -25,6 +25,20 @@ etc.
""" """
class LatentFunctionInference(object):
def on_optimization_start(self):
"""
This function gets called, just before the optimization loop to start.
"""
pass
def on_optimization_end(self):
"""
This function gets called, just after the optimization loop ended.
"""
pass
from exact_gaussian_inference import ExactGaussianInference from exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace from laplace import Laplace
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
@ -38,11 +52,26 @@ from var_dtc_gpu import VarDTC_GPU
# class FullLatentFunctionData(object): # class FullLatentFunctionData(object):
# #
# #
# class LatentFunctionInference(object):
# def inference(self, kern, X, likelihood, Y, Y_metadata=None): # class EMLikeLatentFunctionInference(LatentFunctionInference):
# def update_approximation(self):
# """
# This function gets called when the
# """
#
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """ # """
# Do inference on the latent functions given a covariance function `kern`, # Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. # inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"
#
# class VariationalLatentFunctionInference(LatentFunctionInference):
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`. # Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """ # """
# raise NotImplementedError, "Abstract base class for full inference" # raise NotImplementedError, "Abstract base class for full inference"

View file

@ -4,9 +4,10 @@
from posterior import Posterior from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
import numpy as np import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class DTC(object): class DTC(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.

View file

@ -5,10 +5,11 @@ from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag from ...util import diag
import numpy as np import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class ExactGaussianInference(object): class ExactGaussianInference(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian. An object for inference when the likelihood is Gaussian.

View file

@ -1,9 +1,10 @@
import numpy as np import numpy as np
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
from posterior import Posterior from posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class EP(object): class EP(LatentFunctionInference):
def __init__(self, epsilon=1e-6, eta=1., delta=1.): def __init__(self, epsilon=1e-6, eta=1., delta=1.):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
@ -21,6 +22,14 @@ class EP(object):
def reset(self): def reset(self):
self.old_mutilde, self.old_vtilde = None, None self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def on_optimization_start(self):
self._ep_approximation = None
def on_optimization_end(self):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
num_data, output_dim = X.shape num_data, output_dim = X.shape
@ -28,7 +37,10 @@ class EP(object):
K = kern.K(X) K = kern.K(X)
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata) if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
@ -42,8 +54,6 @@ class EP(object):
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def expectation_propagation(self, K, Y, likelihood, Y_metadata): def expectation_propagation(self, K, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape num_data, data_dim = Y.shape
@ -108,4 +118,3 @@ class EP(object):
mu_tilde = v_tilde/tau_tilde mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat return mu, Sigma, mu_tilde, tau_tilde, Z_hat

View file

@ -1,11 +1,59 @@
import numpy as np import numpy as np
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from ...util import diag
from expectation_propagation import EP from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
from ...util.misc import param_to_array
from ...core.parameterization.variational import VariationalPosterior
from . import LatentFunctionInference
from posterior import Posterior from posterior import Posterior
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class EPDTC(EP): class EPDTC(LatentFunctionInference):
#def __init__(self, epsilon=1e-6, eta=1., delta=1.): const_jitter = 1e-6
def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1):
from ...util.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset()
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y)))
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.limit
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return param_to_array(Y)
else:
return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_data, output_dim = X.shape num_data, output_dim = X.shape
@ -14,26 +62,131 @@ class EPDTC(EP):
Kmm = kern.K(Z) Kmm = kern.K(Z)
Kmn = kern.K(Z,X) Kmn = kern.K(Z,X)
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
psi0 = kern.psi0(Z, X)
psi1 = Kmn.T#kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
else:
uncertain_inputs = False
psi0 = kern.Kdiag(X)
psi1 = Kmn.T#kern.K(X, Z)
psi2 = None
#see whether we're using variational uncertain inputs
_, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
#beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
beta = tau_tilde
VVT_factor = beta[:,None]*mu_tilde[:,None]
trYYT = self.get_trYYT(mu_tilde[:,None])
# do the inference:
het_noise = beta.size > 1
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0]
Kmmi = np.dot(Lmi.T,Lmi) # The rather complex computations of A
KmmiKmn = np.dot(Kmmi,Kmn) if uncertain_inputs:
K = np.dot(Kmn.T,KmmiKmn) if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places
dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, mu_tilde[:,None])
dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V
else:
print 'foobar'
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
#construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
alpha, _ = dpotrs(LW, mu_tilde, lower=1)
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
@ -121,3 +274,69 @@ class EPDTC(EP):
mu_tilde = v_tilde/tau_tilde mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat return mu, Sigma, mu_tilde, tau_tilde, Z_hat
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise:
if uncertain_inputs:
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
dL_dpsi2 = None
else:
dL_dpsi2 = beta * dL_dpsi2_beta
if uncertain_inputs:
# repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y):
# the partial derivative vector for the likelihood
if likelihood.size == 0:
# save computation here.
dL_dR = None
elif het_noise:
if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
#from ...util.linalg import chol_inv
#LBi = chol_inv(LB)
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else:
# likelihood is not heteroscedatic
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
return dL_dR
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y):
#compute log marginal likelihood
if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1))
lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal

View file

@ -5,9 +5,10 @@ from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
from ...util import diag from ...util import diag
import numpy as np import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class FITC(object): class FITC(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.

View file

@ -16,8 +16,9 @@ from ...util.misc import param_to_array
from posterior import Posterior from posterior import Posterior
import warnings import warnings
from scipy import optimize from scipy import optimize
from . import LatentFunctionInference
class Laplace(object): class Laplace(LatentFunctionInference):
def __init__(self): def __init__(self):
""" """

View file

@ -7,9 +7,10 @@ from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class VarDTC(object): class VarDTC(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
@ -190,7 +191,7 @@ class VarDTC(object):
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class VarDTCMissingData(object): class VarDTCMissingData(LatentFunctionInference):
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self, limit=1, inan=None): def __init__(self, limit=1, inan=None):
from ...util.caching import Cacher from ...util.caching import Cacher

View file

@ -7,6 +7,7 @@ from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
from ...util import gpu_init from ...util import gpu_init
@ -19,7 +20,7 @@ try:
except: except:
pass pass
class VarDTC_GPU(object): class VarDTC_GPU(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.

View file

@ -7,9 +7,10 @@ from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class VarDTC_minibatch(object): class VarDTC_minibatch(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
@ -70,12 +71,13 @@ class VarDTC_minibatch(object):
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
if het_noise:
self.batchsize = 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y YYT_factor = Y
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
psi2_full = np.zeros((num_inducing,num_inducing)) psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0 psi0_full = 0
@ -104,19 +106,18 @@ class VarDTC_minibatch(object):
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else: else:
psi0_full += psi0.sum() psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs: if uncertain_inputs:
if het_noise: if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2) psi2_full += beta_slice*psi2
else: else:
psi2_full += psi2.sum(axis=0) psi2_full += psi2
else: else:
if het_noise: if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1) psi2_full += beta_slice*np.outer(psi1,psi1)
else: else:
psi2_full += tdot(psi1.T) psi2_full += np.outer(psi1,psi1)
if not het_noise: if not het_noise:
psi0_full *= beta psi0_full *= beta
@ -223,7 +224,7 @@ class VarDTC_minibatch(object):
psi2 = None psi2 = None
if het_noise: if het_noise:
beta = beta[n_start:n_end] beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1) betapsi1 = np.einsum('n,nm->nm',beta,psi1)
@ -244,7 +245,7 @@ class VarDTC_minibatch(object):
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2 = np.einsum('n,mo->nmo',beta * np.ones((n_end-n_start,)),dL_dpsi2R) dL_dpsi2 = beta* dL_dpsi2R
else: else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None dL_dpsi2 = None
@ -262,11 +263,11 @@ class VarDTC_minibatch(object):
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else: else:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum() + np.square(beta)*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum() dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0, grad_dict = {'dL_dpsi0':dL_dpsi0,
@ -296,7 +297,7 @@ def update_gradients(model):
kern_grad = model.kern.gradient.copy() kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False isEnd = False
while not isEnd: while not isEnd:
@ -309,8 +310,8 @@ def update_gradients(model):
kern_grad += model.kern.gradient kern_grad += model.kern.gradient
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( model.Z.gradient += model.kern.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X #gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])

View file

@ -119,7 +119,7 @@ class Add(CombinationKernel):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias from static import White, Bias
target = np.zeros(Z.shape) target = np.zeros(Z.shape)
for p1 in self.parts: for p1 in self.parts:
@ -141,10 +141,10 @@ class Add(CombinationKernel):
from static import White, Bias from static import White, Bias
target_mu = np.zeros(variational_posterior.shape) target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape) target_S = np.zeros(variational_posterior.shape)
for p1 in self._parameters_: for p1 in self.parameters:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy() eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self._parameters_: for p2 in self.parameters:
if p2 is p1: if p2 is p1:
continue continue
if isinstance(p2, White): if isinstance(p2, White):
@ -160,7 +160,7 @@ class Add(CombinationKernel):
def add(self, other, name='sum'): def add(self, other, name='sum'):
if isinstance(other, Add): if isinstance(other, Add):
other_params = other._parameters_[:] other_params = other.parameters[:]
for p in other_params: for p in other_params:
other.remove_parameter(p) other.remove_parameter(p)
self.add_parameters(*other_params) self.add_parameters(*other_params)
@ -170,4 +170,4 @@ class Add(CombinationKernel):
return self return self
def input_sensitivity(self): def input_sensitivity(self):
return reduce(np.add, [k.input_sensitivity() for k in self.parts]) return reduce(np.add, [k.input_sensitivity() for k in self.parts])

View file

@ -103,7 +103,7 @@ class Kern(Parameterized):
""" """
raise NotImplementedError raise NotImplementedError
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
""" """
Returns the derivative of the objective wrt Z, using the chain rule Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables. through the expectation variables.
@ -183,9 +183,9 @@ class Kern(Parameterized):
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod from prod import Prod
#kernels = [] #kernels = []
#if isinstance(self, Prod): kernels.extend(self._parameters_) #if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self) #else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other._parameters_) #if isinstance(other, Prod): kernels.extend(other.parameters)
#else: kernels.append(other) #else: kernels.append(other)
return Prod([self, other], name) return Prod([self, other], name)
@ -222,7 +222,7 @@ class CombinationKernel(Kern):
@property @property
def parts(self): def parts(self):
return self._parameters_ return self.parameters
def get_input_dim_active_dims(self, kernels, extra_dims = None): def get_input_dim_active_dims(self, kernels, extra_dims = None):
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) #active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))

View file

@ -124,9 +124,9 @@ def _slice_update_gradients_expectations(f):
def _slice_gradients_Z_expectations(f): def _slice_gradients_Z_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s: with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2)) ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret return ret
return wrap return wrap

View file

@ -169,7 +169,7 @@ class Linear(Kern):
else: else:
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
mu = variational_posterior.mean mu = variational_posterior.mean

View file

@ -9,12 +9,23 @@ import numpy as np
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this
@Cache_this(limit=1) @Cache_this(limit=1)
def _Z_distances(Z): def psicomputations(variance, lengthscale, Z, mu, S, gamma):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q """
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q Z - MxQ
return Zhat, Zdist mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
@Cache_this(limit=1)
def _psi1computations(variance, lengthscale, Z, mu, S, gamma): def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -22,15 +33,10 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
S - NxQ S - NxQ
gamma - NxQ gamma - NxQ
""" """
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
@ -40,25 +46,15 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ _psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ _psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ _psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ _psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ _psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) _psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM _psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dpsi1_dvariance = _psi1 / variance # NxM
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale return _psi1
@Cache_this(limit=1)
def _psi2computations(variance, lengthscale, Z, mu, S, gamma): def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -66,19 +62,14 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
S - NxQ S - NxQ
gamma - NxQ gamma - NxQ
""" """
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 NxMxM # _psi2 MxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z) _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
@ -93,15 +84,130 @@ def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM _dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ _dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ # print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM
# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -42,9 +42,11 @@ class RBF(Stationary):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
if self.useGPU: if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else: else:
return self.Kdiag(variational_posterior.mean) return self.Kdiag(variational_posterior.mean)
@ -53,7 +55,7 @@ class RBF(Stationary):
if self.useGPU: if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else: else:
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else: else:
_, _, _, psi1 = self._psi1computations(Z, variational_posterior) _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1 return psi1
@ -63,7 +65,7 @@ class RBF(Stationary):
if self.useGPU: if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else: else:
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else: else:
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2 return psi2
@ -74,26 +76,30 @@ class RBF(Stationary):
if self.useGPU: if self.useGPU:
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) # _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) # _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
#contributions from psi0: # #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) # self.variance.gradient = np.sum(dL_dpsi0)
#
#from psi1 # #from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) # self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD: # if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) # self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else: # else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() # self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#
#from psi2 # #from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() # self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD: # if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) # self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else: # else:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() # self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale**2 l2 = self.lengthscale**2
@ -126,22 +132,25 @@ class RBF(Stationary):
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU: if self.useGPU:
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) _, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) return dL_dZ
#psi1 # _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) # _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
#psi2 # #psi1
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) # grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#
return grad # #psi2
# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
#
# return grad
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2 l2 = self.lengthscale **2
@ -167,26 +176,29 @@ class RBF(Stationary):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU: if self.useGPU:
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else: else:
ndata = variational_posterior.mean.shape[0] _, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
return dL_dmu, dL_dS, dL_dgamma
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) # ndata = variational_posterior.mean.shape[0]
#
#psi1 # _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) # _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) #
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) # #psi1
# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
#psi2 # grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) # grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) #
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) # #psi2
# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
if self.group_spike_prob: # grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma[:] = grad_gamma.mean(axis=0) # grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
#
return grad_mu, grad_S, grad_gamma # if self.group_spike_prob:
# grad_gamma[:] = grad_gamma.mean(axis=0)
#
# return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):

View file

@ -25,7 +25,7 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape) return np.zeros(Z.shape)
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):

View file

@ -227,3 +227,6 @@ class Bernoulli(Likelihood):
ns = np.ones_like(gp, dtype=int) ns = np.ones_like(gp, dtype=int)
Ysim = np.random.binomial(ns, self.gp_link.transf(gp)) Ysim = np.random.binomial(ns, self.gp_link.transf(gp))
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass

View file

@ -11,7 +11,7 @@ from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..inference.optimization import SCG
from ..util import linalg from ..util import linalg
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
@ -41,7 +41,7 @@ class SSGPLVM(SparseGP):
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
if group_spike: if group_spike:
@ -60,12 +60,15 @@ class SSGPLVM(SparseGP):
pi = np.empty((input_dim)) pi = np.empty((input_dim))
pi[:] = 0.5 pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
X = np.asfortranarray(X)
X_variance = np.asfortranarray(X_variance)
gamma = np.asfortranarray(gamma)
X = SpikeAndSlabPosterior(X, X_variance, gamma) X = SpikeAndSlabPosterior(X, X_variance, gamma)
if group_spike: if group_spike:
kernel.group_spike_prob = True kernel.group_spike_prob = True
self.variational_prior.group_spike_prob = True self.variational_prior.group_spike_prob = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
@ -76,7 +79,7 @@ class SSGPLVM(SparseGP):
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU): if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
update_gradients(self) update_gradients(self)
return return

View file

@ -68,7 +68,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
for i in range(ard_params.shape[0]): for i in range(ard_params.shape[0]):
c = Tango.nextMedium() c = Tango.nextMedium()
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom)) bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
bottom += ard_params[i,:] bottom += ard_params[i,:]
ax.set_xlim(-.5, kernel.input_dim - .5) ax.set_xlim(-.5, kernel.input_dim - .5)

View file

@ -102,7 +102,8 @@ class lvm(matplotlib_show):
vals = param_to_array(model.X.mean) vals = param_to_array(model.X.mean)
else: else:
vals = param_to_array(model.X) vals = param_to_array(model.X)
if len(vals.shape)==1:
vals = vals[None,:]
matplotlib_show.__init__(self, vals, axes=latent_axes) matplotlib_show.__init__(self, vals, axes=latent_axes)
if isinstance(latent_axes,mpl.axes.Axes): if isinstance(latent_axes,mpl.axes.Axes):
@ -393,14 +394,13 @@ class mocap_data_show_vpython(vpython_show):
def process_values(self): def process_values(self):
raise NotImplementedError, "this needs to be implemented to use the data_show class" raise NotImplementedError, "this needs to be implemented to use the data_show class"
class mocap_data_show(matplotlib_show): class mocap_data_show(matplotlib_show):
"""Base class for visualizing motion capture data.""" """Base class for visualizing motion capture data."""
def __init__(self, vals, axes=None, connect=None): def __init__(self, vals, axes=None, connect=None):
if axes==None: if axes==None:
fig = plt.figure() fig = plt.figure()
axes = fig.add_subplot(111, projection='3d') axes = fig.add_subplot(111, projection='3d',aspect='equal')
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.connect = connect self.connect = connect
@ -445,11 +445,12 @@ class mocap_data_show(matplotlib_show):
def process_values(self): def process_values(self):
raise NotImplementedError, "this needs to be implemented to use the data_show class" raise NotImplementedError, "this needs to be implemented to use the data_show class"
def initialize_axes(self): def initialize_axes(self, boundary=0.05):
"""Set up the axes with the right limits and scaling.""" """Set up the axes with the right limits and scaling."""
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()]) bs = [(self.vals[:, i].max()-self.vals[:, i].min())*boundary for i in xrange(3)]
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()]) self.x_lim = np.array([self.vals[:, 0].min()-bs[0], self.vals[:, 0].max()+bs[0]])
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()]) self.y_lim = np.array([self.vals[:, 1].min()-bs[1], self.vals[:, 1].max()+bs[1]])
self.z_lim = np.array([self.vals[:, 2].min()-bs[2], self.vals[:, 2].max()+bs[2]])
def initialize_axes_modify(self): def initialize_axes_modify(self):
self.points_handle.remove() self.points_handle.remove()
@ -472,6 +473,8 @@ class mocap_data_show(matplotlib_show):
class stick_show(mocap_data_show): class stick_show(mocap_data_show):
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect.""" """Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
def __init__(self, vals, connect=None, axes=None): def __init__(self, vals, connect=None, axes=None):
if len(vals.shape)==1:
vals = vals[None,:]
mocap_data_show.__init__(self, vals, axes=axes, connect=connect) mocap_data_show.__init__(self, vals, axes=axes, connect=connect)
def process_values(self): def process_values(self):

View file

@ -95,7 +95,7 @@ class ParameterizedTest(unittest.TestCase):
self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist()) self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist())
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white.parameters[0])
def test_default_constraints(self): def test_default_constraints(self):
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)

View file

@ -89,28 +89,28 @@ class Test(ListDictTestCase):
self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops) self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops)
self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops) self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:
par.pickle(f) par.pickle(f)
f.seek(0) f.seek(0)
pcopy = pickle.load(f) pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10 pcopy.gradient = 10
np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient) np.testing.assert_allclose(par.linear.gradient_full, pcopy.linear.gradient_full)
np.testing.assert_allclose(pcopy.linear.full_gradient, 10) np.testing.assert_allclose(pcopy.linear.gradient_full, 10)
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
def test_model(self): def test_model(self):
par = toy_rbf_1d_50(optimize=0, plot=0) par = toy_rbf_1d_50(optimize=0, plot=0)
pcopy = par.copy() pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0)) self.assert_(np.any(pcopy.gradient!=0.0))
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:
@ -118,7 +118,7 @@ class Test(ListDictTestCase):
f.seek(0) f.seek(0)
pcopy = pickle.load(f) pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad()) self.assert_(pcopy.checkgrad())
@ -126,10 +126,10 @@ class Test(ListDictTestCase):
par = toy_rbf_1d_50(optimize=0, plot=0) par = toy_rbf_1d_50(optimize=0, plot=0)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0)) self.assert_(np.any(pcopy.gradient!=0.0))
pcopy.optimize('bfgs') pcopy.optimize('bfgs')
@ -140,7 +140,7 @@ class Test(ListDictTestCase):
f.seek(0) f.seek(0)
pcopy = pickle.load(f) pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad()) self.assert_(pcopy.checkgrad())
@ -151,18 +151,18 @@ class Test(ListDictTestCase):
par.gradient = 10 par.gradient = 10
pcopy = par.copy() pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:
par.pickle(f) par.pickle(f)
f.seek(0) f.seek(0)
pcopy = pickle.load(f) pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
pcopy.gradient = 10 pcopy.gradient = 10
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
np.testing.assert_allclose(pcopy.mean.full_gradient, 10) np.testing.assert_allclose(pcopy.mean.gradient_full, 10)
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
def test_model_concat(self): def test_model_concat(self):
@ -170,10 +170,10 @@ class Test(ListDictTestCase):
par.randomize() par.randomize()
pcopy = par.copy() pcopy = par.copy()
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) self.assertListEqual(par.gradient_full.tolist(), pcopy.gradient_full.tolist())
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assertIsNot(par.param_array, pcopy.param_array) self.assertIsNot(par.param_array, pcopy.param_array)
self.assertIsNot(par.full_gradient, pcopy.full_gradient) self.assertIsNot(par.gradient_full, pcopy.gradient_full)
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assert_(np.any(pcopy.gradient!=0.0)) self.assert_(np.any(pcopy.gradient!=0.0))
with tempfile.TemporaryFile('w+b') as f: with tempfile.TemporaryFile('w+b') as f:
@ -181,7 +181,7 @@ class Test(ListDictTestCase):
f.seek(0) f.seek(0)
pcopy = pickle.load(f) pcopy = pickle.load(f)
self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist())
np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) np.testing.assert_allclose(par.gradient_full, pcopy.gradient_full)
self.assertSequenceEqual(str(par), str(pcopy)) self.assertSequenceEqual(str(par), str(pcopy))
self.assert_(pcopy.checkgrad()) self.assert_(pcopy.checkgrad())

View file

@ -8,7 +8,7 @@ import numpy as np
from GPy.util.pca import pca from GPy.util.pca import pca
def initialize_latent(init, input_dim, Y): def initialize_latent(init, input_dim, Y):
Xr = np.random.randn(Y.shape[0], input_dim) Xr = np.asfortranarray(np.random.randn(Y.shape[0], input_dim))
if init == 'PCA': if init == 'PCA':
p = pca(Y) p = pca(Y)
PC = p.project(Y, min(input_dim, Y.shape[1])) PC = p.project(Y, min(input_dim, Y.shape[1]))
@ -20,4 +20,4 @@ def initialize_latent(init, input_dim, Y):
Xr -= Xr.mean(0) Xr -= Xr.mean(0)
Xr /= Xr.var(0) Xr /= Xr.var(0)
return Xr, var/var.max() return Xr, var/var.max()

View file

@ -123,7 +123,7 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
:returns: :returns:
""" """
A = force_F_ordered(A) A = np.asfortranarray(A)
#Note: B does not seem to need to be F ordered! #Note: B does not seem to need to be F ordered!
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag) return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)