Merge branch 'params' of https://github.com/SheffieldML/GPy into params

This commit is contained in:
Neil Lawrence 2014-03-03 15:03:00 +00:00
commit d26f35247d
40 changed files with 1347 additions and 1102 deletions

View file

@ -70,7 +70,7 @@ class GP(Model):
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
self._dL_dK = grad_dict['dL_dK'] self.kern.update_gradients_full(grad_dict['dL_dK'], self.X)
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood

View file

@ -20,14 +20,13 @@ class Model(Parameterized):
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'scg' self.preferred_optimizer = 'scg'
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
# def dK_d(self, param, dL_dK, X, X2)
g = np.zeros(self.size) g = np.zeros(self.size)
try: try:
# [g.__setitem__(s, self.gradient_mapping[p]().flat) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
[p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed] [p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
except ValueError: except ValueError:
raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name) raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
@ -61,99 +60,6 @@ class Model(Parameterized):
self.priors = state.pop() self.priors = state.pop()
Parameterized._setstate(self, state) Parameterized._setstate(self, state)
# def set_prior(self, regexp, what):
# """
#
# Sets priors on the model parameters.
#
# **Notes**
#
# Asserts that the prior is suitable for the constraint. If the
# wrong constraint is in place, an error is raised. If no
# constraint is in place, one is added (warning printed).
#
# For tied parameters, the prior will only be "counted" once, thus
# a prior object is only inserted on the first tied index
#
# :param regexp: regular expression of parameters on which priors need to be set.
# :type param: string, regexp, or integer array
# :param what: prior to set on parameter.
# :type what: GPy.core.Prior type
#
# """
# if self.priors is None:
# self.priors = [None for i in range(self._get_params().size)]
#
# which = self.grep_param_names(regexp)
#
# # check tied situation
# tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
# if len(tie_partial_matches):
# raise ValueError, "cannot place prior across partial ties"
# tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
# if len(tie_matches) > 1:
# raise ValueError, "cannot place prior across multiple ties"
# elif len(tie_matches) == 1:
# which = which[:1] # just place a prior object on the first parameter
#
#
# # check constraints are okay
#
# if what.domain is _POSITIVE:
# constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is _POSITIVE]
# if len(constrained_positive_indices):
# constrained_positive_indices = np.hstack(constrained_positive_indices)
# else:
# constrained_positive_indices = np.zeros(shape=(0,))
# bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
# assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
# unconst = np.setdiff1d(which, constrained_positive_indices)
# if len(unconst):
# print "Warning: constraining parameters to be positive:"
# print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
# print '\n'
# self.constrain_positive(unconst)
# elif what.domain is _REAL:
# assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
# else:
# raise ValueError, "prior not recognised"
#
# # store the prior in a local list
# for w in which:
# self.priors[w] = what
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
:param name: the name of parameters required (as a regular expression).
:type name: regular expression
:param return_names: whether or not to return the names matched (default False)
:type return_names: bool
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s" % name
def randomize(self):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
# first take care of all parameters (from N(0,1))
# x = self._get_params_transformed()
x = np.random.randn(self.size_transformed)
x = self._untransform_params(x)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params(x)
# self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
Perform random restarts of the model, and set the model to the best Perform random restarts of the model, and set the model to the best
@ -183,7 +89,9 @@ class Model(Parameterized):
:param messages: whether to display during optimisation :param messages: whether to display during optimisation
:type messages: bool :type messages: bool
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine. .. note:: If num_processes is None, the number of workes in the
multiprocessing pool is automatically set to the number of processors
on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
@ -227,45 +135,23 @@ class Model(Parameterized):
self._set_params_transformed(initial_parameters) self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self, warning=True): def ensure_default_constraints(self, warning=True):
""" """
Ensure that any variables which should clearly be positive Ensure that any variables which should clearly be positive
have been constrained somehow. The method performs a regular have been constrained somehow. The method performs a regular
expression search on parameter names looking for the terms expression search on parameter names looking for the terms
'variance', 'lengthscale', 'precision' and 'kappa'. If any of 'variance', 'lengthscale', 'precision' and 'kappa'. If any of
these terms are present in the name the parameter is these terms are present in the name the parameter is
constrained positive. constrained positive.
DEPRECATED.
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning, 'parameters now have default constraints'
#positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names()
# for s in positive_strings:
# paramlist = self.grep_param_names(".*"+s)
# for param in paramlist:
# for p in param.flattened_parameters:
# rav_i = set(self._raveled_index_for(p))
# for constraint in self.constraints.iter_properties():
# rav_i -= set(self._constraint_indices(p, constraint))
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
# if ind.size != 0:
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
# if paramlist:
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
# currently_constrained = self.all_constrained_indices()
# to_make_positive = []
# for s in positive_strings:
# for i in self.grep_param_names(".*" + s):
# if not (i in currently_constrained):
# to_make_positive.append(i)
# if len(to_make_positive):
# self.constrain_positive(np.asarray(to_make_positive), warning=warning)
def objective_function(self, x): def objective_function(self, x):
""" """
The objective function passed to the optimizer. It combines The objective function passed to the optimizer. It combines
the likelihood and the priors. the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to Failures are handled robustly. The algorithm will try several times to
return the objective, and will raise the original exception if return the objective, and will raise the original exception if
the objective cannot be computed. the objective cannot be computed.
@ -336,8 +222,15 @@ class Model(Parameterized):
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimizer: string TODO: valid strings? :type optimizer: string
TODO: valid args
""" """
if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed"
if self.size == 0:
raise RuntimeError, "Model without parameters cannot be minimized"
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
@ -377,7 +270,7 @@ class Model(Parameterized):
and numerical gradients is within <tolerance> of unity. and numerical gradients is within <tolerance> of unity.
""" """
x = self._get_params_transformed().copy() x = self._get_params_transformed()
if not verbose: if not verbose:
# make sure only to test the selected parameters # make sure only to test the selected parameters
@ -389,15 +282,15 @@ class Model(Parameterized):
indices = np.r_[:self.size] indices = np.r_[:self.size]
which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero() which = (transformed_index[:,None]==indices[self._fixes_][None,:]).nonzero()
transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]] transformed_index = (indices-(~self._fixes_).cumsum())[transformed_index[which[0]]]
if transformed_index.size == 0: if transformed_index.size == 0:
print "No free parameters to check" print "No free parameters to check"
return return
# just check the global ratio # just check the global ratio
dx = np.zeros_like(x) dx = np.zeros(x.shape)
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
# evaulate around the point x # evaulate around the point x
f1 = self.objective_function(x + dx) f1 = self.objective_function(x + dx)
f2 = self.objective_function(x - dx) f2 = self.objective_function(x - dx)
@ -405,10 +298,9 @@ class Model(Parameterized):
dx = dx[transformed_index] dx = dx[transformed_index]
gradient = gradient[transformed_index] gradient = gradient[transformed_index]
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient == 0, 1e-32, gradient))) global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient == 0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) return (np.abs(1. - global_ratio) < tolerance)
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
@ -439,7 +331,7 @@ class Model(Parameterized):
#print param_index, transformed_index #print param_index, transformed_index
else: else:
transformed_index = param_index transformed_index = param_index
if param_index.size == 0: if param_index.size == 0:
print "No free parameters to check" print "No free parameters to check"
return return
@ -472,82 +364,5 @@ class Model(Parameterized):
print grad_string print grad_string
self._set_params_transformed(x) self._set_params_transformed(x)
return ret return ret
def input_sensitivity(self):
"""
return an array describing the sesitivity of the model to each input
NB. Right now, we're basing this on the lengthscales (or
variances) of the kernel. TODO: proper sensitivity analysis
where we integrate across the model inputs and evaluate the
effect on the variance of the model output. """
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
from ..kern import RBF, Linear#, RBFInv
if isinstance(k, RBF):
return 1. / k.lengthscale
#elif isinstance(k, RBFInv):
# return k.inv_lengthscale
elif isinstance(k, Linear):
return k.variances
else:
raise ValueError, "cannot determine sensitivity for this kernel"
def pseudo_EM(self, stop_crit=.1, **kwargs):
"""
EM - like algorithm for Expectation Propagation and Laplace approximation
:param stop_crit: convergence criterion
:type stop_crit: float
.. Note: kwargs are passed to update_likelihood and optimize functions.
"""
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
ll_change = stop_crit + 1.
iteration = 0
last_ll = -np.inf
convergence = False
alpha = 0
stop = False
# Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon', 'power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop:
last_approximation = self.likelihood.copy()
last_params = self._get_params()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'], power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation()
new_ll = self.log_likelihood()
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
last_ll = self.log_likelihood()
if ll_change < stop_crit:
stop = True
iteration += 1
if stop:
print "%s iterations." % iteration
self.update_likelihood_approximation()

View file

@ -4,20 +4,7 @@
__updated__ = '2013-12-16' __updated__ = '2013-12-16'
import numpy as np import numpy as np
from parameter_core import Observable, Parameterizable from parameter_core import Observable
class ParamList(list):
"""
List to store ndarray-likes in.
It will look for 'is' instead of calling __eq__ on each element.
"""
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass
class ObservableArray(np.ndarray, Observable): class ObservableArray(np.ndarray, Observable):
""" """
@ -59,14 +46,14 @@ class ObservableArray(np.ndarray, Observable):
else: else:
return s.size != 0 return s.size != 0
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val):
if self._s_not_empty(s): if self._s_not_empty(s):
super(ObservableArray, self).__setitem__(s, val) super(ObservableArray, self).__setitem__(s, val)
if update: self._notify_observers(self[s])
self._notify_observers()
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop)) return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val): def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val) return self.__setitem__(slice(start, stop), val)

View file

@ -5,47 +5,7 @@ Created on Oct 2, 2013
''' '''
import numpy import numpy
from numpy.lib.function_base import vectorize from numpy.lib.function_base import vectorize
from param import Param from lists_and_dicts import IntArrayDict
from collections import defaultdict
class ParamDict(defaultdict):
def __init__(self):
"""
Default will be self._default, if not set otherwise
"""
defaultdict.__init__(self, self.default_factory)
def __getitem__(self, key):
try:
return defaultdict.__getitem__(self, key)
except KeyError:
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return defaultdict.__getitem__(self, a)
raise
def __contains__(self, key):
if defaultdict.__contains__(self, key):
return True
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return True
return False
def __setitem__(self, key, value):
if isinstance(key, Param):
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return super(ParamDict, self).__setitem__(a, value)
defaultdict.__setitem__(self, key, value)
class SetDict(ParamDict):
def default_factory(self):
return set()
class IntArrayDict(ParamDict):
def default_factory(self):
return numpy.int_([])
class ParameterIndexOperations(object): class ParameterIndexOperations(object):
''' '''
@ -194,9 +154,13 @@ class ParameterIndexOperationsView(object):
def shift_right(self, start, size): def shift_right(self, start, size):
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations' self._param_index_ops.shift_right(start+self._offset, size)
def shift_left(self, start, size):
self._param_index_ops.shift_left(start+self._offset, size)
self._offset -= size
self._size -= size
def clear(self): def clear(self):
for i, ind in self.items(): for i, ind in self.items():
self._param_index_ops.remove(i, ind+self._offset) self._param_index_ops.remove(i, ind+self._offset)
@ -232,9 +196,7 @@ class ParameterIndexOperationsView(object):
def __getitem__(self, prop): def __getitem__(self, prop):
ind = self._filter_index(self._param_index_ops[prop]) ind = self._filter_index(self._param_index_ops[prop])
if ind.size > 0: return ind
return ind
raise KeyError, prop
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
import pprint import pprint

View file

@ -0,0 +1,35 @@
'''
Created on 27 Feb 2014
@author: maxz
'''
from collections import defaultdict
class DefaultArrayDict(defaultdict):
def __init__(self):
"""
Default will be self._default, if not set otherwise
"""
defaultdict.__init__(self, self.default_factory)
class SetDict(DefaultArrayDict):
def default_factory(self):
return set()
class IntArrayDict(DefaultArrayDict):
def default_factory(self):
import numpy as np
return np.int_([])
class ArrayList(list):
"""
List to store ndarray-likes in.
It will look for 'is' instead of calling __eq__ on each element.
"""
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass

View file

@ -3,8 +3,8 @@
import itertools import itertools
import numpy import numpy
from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from parameter_core import OptimizationHandlable, Gradcheckable, adjust_name_for_printing
from array_core import ObservableArray, ParamList from array_core import ObservableArray
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(Constrainable, ObservableArray, Gradcheckable, Indexable): class Param(OptimizationHandlable, ObservableArray, Gradcheckable):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -50,7 +50,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
obj._realsize_ = obj.size obj._realsize_ = obj.size
obj._realndim_ = obj.ndim obj._realndim_ = obj.ndim
obj._updated_ = False obj._updated_ = False
from index_operations import SetDict from lists_and_dicts import SetDict
obj._tied_to_me_ = SetDict() obj._tied_to_me_ = SetDict()
obj._tied_to_ = [] obj._tied_to_ = []
obj._original_ = True obj._original_ = True
@ -60,6 +60,19 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
def __init__(self, name, input_array, default_constraint=None, *a, **kw): def __init__(self, name, input_array, default_constraint=None, *a, **kw):
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw) super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
def build_pydot(self,G):
import pydot
node = pydot.Node(id(self), shape='record', label=self.name)
G.add_node(node)
for o in self._observer_callables_.keys():
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)
edge = pydot.Edge(str(id(self)), str(id(o)), color='darkorange2', arrowhead='vee')
G.add_edge(edge)
return node
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
if obj is None: return if obj is None: return
@ -135,10 +148,11 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
#=========================================================================== #===========================================================================
# get/set parameters # get/set parameters
#=========================================================================== #===========================================================================
def _set_params(self, param, update=True): def _set_params(self, param, trigger_parent=True):
self.flat = param self.flat = param
#self._notify_tied_parameters() if trigger_parent: min_priority = None
self._notify_observers() else: min_priority = -numpy.inf
self._notify_observers(None, min_priority)
def _get_params(self): def _get_params(self):
return self.flat return self.flat
@ -161,12 +175,10 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass # returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
def __setitem__(self, s, val, update=True):
super(Param, self).__setitem__(s, val, update=update) def __setitem__(self, s, val):
#self._notify_tied_parameters() super(Param, self).__setitem__(s, val)
if update and self._s_not_empty(s):
self._notify_parameters_changed()
#=========================================================================== #===========================================================================
# Index Operations: # Index Operations:
#=========================================================================== #===========================================================================
@ -185,6 +197,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
a = self._realshape_[i] + a a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i] internal_offset += a * extended_realshape[i]
return internal_offset return internal_offset
def _raveled_index(self, slice_index=None): def _raveled_index(self, slice_index=None):
# return an index array on the raveled array, which is formed by the current_slice # return an index array on the raveled array, which is formed by the current_slice
# of this object # of this object
@ -192,6 +205,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
ind = self._indices(slice_index) ind = self._indices(slice_index)
if ind.ndim < 2: ind = ind[:, None] if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int) return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -218,7 +232,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
#=========================================================================== #===========================================================================
@property @property
def is_fixed(self): def is_fixed(self):
return self._highest_parent_._is_fixed(self) from transformations import __fixed__
return self.constraints[__fixed__].size == self.size
#def round(self, decimals=0, out=None): #def round(self, decimals=0, out=None):
# view = super(Param, self).round(decimals, out).view(Param) # view = super(Param, self).round(decimals, out).view(Param)
# view.__array_finalize__(self) # view.__array_finalize__(self)
@ -255,7 +270,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
return [t._short() for t in self._tied_to_] or [''] return [t._short() for t in self._tied_to_] or ['']
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hirarchy_name()) x=self.hierarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index): def _ties_for(self, rav_index):
# size = sum(p.size for p in self._tied_to_) # size = sum(p.size for p in self._tied_to_)
@ -289,12 +304,12 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
gen = map(lambda x: " ".join(map(str, x)), gen) gen = map(lambda x: " ".join(map(str, x)), gen)
return reduce(lambda a, b:max(a, len(b)), gen, len(header)) return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self): def _max_len_values(self):
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name())) return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hierarchy_name()))
def _max_len_index(self, ind): def _max_len_index(self, ind):
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
def _short(self): def _short(self):
# short string to print # short string to print
name = self.hirarchy_name() name = self.hierarchy_name()
if self._realsize_ < 2: if self._realsize_ < 2:
return name return name
ind = self._indices() ind = self._indices()
@ -317,8 +332,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
if lp is None: lp = self._max_len_names(prirs, __tie_name__) if lp is None: lp = self._max_len_names(prirs, __tie_name__)
sep = '-' sep = '-'
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__() # except: return super(Param, self).__str__()
@ -333,7 +348,8 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining. See :py:class:`GPy.core.parameter.Param` for more details on constraining.
""" """
# self.params = params # self.params = params
self.params = ParamList([]) from lists_and_dicts import ArrayList
self.params = ArrayList([])
for p in params: for p in params:
for p in p.flattened_parameters: for p in p.flattened_parameters:
if p not in self.params: if p not in self.params:
@ -341,6 +357,21 @@ class ParamConcatenation(object):
self._param_sizes = [p.size for p in self.params] self._param_sizes = [p.size for p in self.params]
startstops = numpy.cumsum([0] + self._param_sizes) startstops = numpy.cumsum([0] + self._param_sizes)
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])] self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
parents = dict()
for p in self.params:
if p.has_parent():
parent = p._direct_parent_
level = 0
while parent is not None:
if parent in parents:
parents[parent] = max(level, parents[parent])
else:
parents[parent] = level
level += 1
parent = parent._direct_parent_
import operator
self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
#=========================================================================== #===========================================================================
# Get/set items, enable broadcasting # Get/set items, enable broadcasting
#=========================================================================== #===========================================================================
@ -354,24 +385,26 @@ class ParamConcatenation(object):
val = val._vals() val = val._vals()
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self._vals(); vals[s] = val; del val vals = self._vals(); vals[s] = val; del val
[numpy.place(p, ind[ps], vals[ps]) and update and p._notify_parameters_changed() [numpy.place(p, ind[ps], vals[ps])
for p, ps in zip(self.params, self._param_slices_)] for p, ps in zip(self.params, self._param_slices_)]
if update:
self.update_all_params()
def _vals(self): def _vals(self):
return numpy.hstack([p._get_params() for p in self.params]) return numpy.hstack([p._get_params() for p in self.params])
#=========================================================================== #===========================================================================
# parameter operations: # parameter operations:
#=========================================================================== #===========================================================================
def update_all_params(self): def update_all_params(self):
for p in self.params: for par in self.parents:
p._notify_parameters_changed() par._notify_observers(-numpy.inf)
def constrain(self, constraint, warning=True): def constrain(self, constraint, warning=True):
[param.constrain(constraint, update=False) for param in self.params] [param.constrain(constraint, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain.__doc__ = Param.constrain.__doc__ constrain.__doc__ = Param.constrain.__doc__
def constrain_positive(self, warning=True): def constrain_positive(self, warning=True):
[param.constrain_positive(warning, update=False) for param in self.params] [param.constrain_positive(warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_positive.__doc__ = Param.constrain_positive.__doc__ constrain_positive.__doc__ = Param.constrain_positive.__doc__
@ -381,12 +414,12 @@ class ParamConcatenation(object):
fix = constrain_fixed fix = constrain_fixed
def constrain_negative(self, warning=True): def constrain_negative(self, warning=True):
[param.constrain_negative(warning, update=False) for param in self.params] [param.constrain_negative(warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_negative.__doc__ = Param.constrain_negative.__doc__ constrain_negative.__doc__ = Param.constrain_negative.__doc__
def constrain_bounded(self, lower, upper, warning=True): def constrain_bounded(self, lower, upper, warning=True):
[param.constrain_bounded(lower, upper, warning, update=False) for param in self.params] [param.constrain_bounded(lower, upper, warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__ constrain_bounded.__doc__ = Param.constrain_bounded.__doc__

View file

@ -2,28 +2,58 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import heapq
__updated__ = '2013-12-16' __updated__ = '2013-12-16'
class HierarchyError(Exception):
"""
Gets thrown when something is wrong with the parameter hierarchy
"""
def adjust_name_for_printing(name): def adjust_name_for_printing(name):
if name is not None: if name is not None:
return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "") return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "")
return '' return ''
class Observable(object): class Observable(object):
_updated = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
from collections import defaultdict self._observer_callables_ = []
self._observer_callables_ = defaultdict(list)
def add_observer(self, observer, callble): def add_observer(self, observer, callble, priority=0):
self._observer_callables_[observer].append(callble) heapq.heappush(self._observer_callables_, (priority, observer, callble))
def remove_observer(self, observer, callble): def remove_observer(self, observer, callble=None):
del self._observer_callables_[observer][callble] to_remove = []
for p, obs, clble in self._observer_callables_:
def _notify_observers(self): if callble is not None:
[[callble(self) for callble in callables] if (obs == observer) and (callble == clble):
for callables in self._observer_callables_.itervalues()] to_remove.append((p, obs, clble))
else:
if obs is observer:
to_remove.append((p, obs, clble))
for r in to_remove:
self._observer_callables_.remove(r)
def _notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
notification loop.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param which: object, which started this notification loop
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
"""
if which is None:
which = self
if min_priority is None:
[callble(which) for _, _, callble in heapq.nlargest(len(self._observer_callables_), self._observer_callables_)]
else:
[callble(which) for p, _, callble in heapq.nlargest(len(self._observer_callables_), self._observer_callables_) if p > min_priority]
class Pickleable(object): class Pickleable(object):
def _getstate(self): def _getstate(self):
@ -72,9 +102,8 @@ class Parentable(object):
return self._direct_parent_._highest_parent_ return self._direct_parent_._highest_parent_
def _notify_parameters_changed(self): def _notify_parameters_changed(self):
if self.has_parent(): raise NotImplementedError, "shouldnt happen, abstract superclass"
self._direct_parent_._notify_parameters_changed()
class Nameable(Parentable): class Nameable(Parentable):
def __init__(self, name, *a, **kw): def __init__(self, name, *a, **kw):
super(Nameable, self).__init__(*a, **kw) super(Nameable, self).__init__(*a, **kw)
@ -90,11 +119,11 @@ class Nameable(Parentable):
self._name = name self._name = name
if self.has_parent(): if self.has_parent():
self._direct_parent_._name_changed(self, from_name) self._direct_parent_._name_changed(self, from_name)
def hirarchy_name(self, adjust_for_printing=True): def hierarchy_name(self, adjust_for_printing=True):
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x else: adjust = lambda x: x
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) return self._direct_parent_.hierarchy_name() + "." + adjust(self.name)
return adjust(self.name) return adjust(self.name)
@ -151,7 +180,7 @@ class Constrainable(Nameable, Indexable):
#=========================================================================== #===========================================================================
# Fixing Parameters: # Fixing Parameters:
#=========================================================================== #===========================================================================
def constrain_fixed(self, value=None, warning=True): def constrain_fixed(self, value=None, warning=True, trigger_parent=True):
""" """
Constrain this paramter to be fixed to the current value it carries. Constrain this paramter to be fixed to the current value it carries.
@ -159,7 +188,7 @@ class Constrainable(Nameable, Indexable):
""" """
if value is not None: if value is not None:
self[:] = value self[:] = value
self.constrain(__fixed__, warning=warning) self.constrain(__fixed__, warning=warning, trigger_parent=trigger_parent)
rav_i = self._highest_parent_._raveled_index_for(self) rav_i = self._highest_parent_._raveled_index_for(self)
self._highest_parent_._set_fixed(rav_i) self._highest_parent_._set_fixed(rav_i)
fix = constrain_fixed fix = constrain_fixed
@ -200,9 +229,9 @@ class Constrainable(Nameable, Indexable):
#=========================================================================== #===========================================================================
# Prior Operations # Prior Operations
#=========================================================================== #===========================================================================
def set_prior(self, prior, warning=True, update=True): def set_prior(self, prior, warning=True, trigger_parent=True):
repriorized = self.unset_priors() repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning, update) self._add_to_index_operations(self.priors, repriorized, prior, warning)
def unset_priors(self, *priors): def unset_priors(self, *priors):
return self._remove_from_index_operations(self.priors, priors) return self._remove_from_index_operations(self.priors, priors)
@ -228,7 +257,7 @@ class Constrainable(Nameable, Indexable):
# Constrain operations -> done # Constrain operations -> done
#=========================================================================== #===========================================================================
def constrain(self, transform, warning=True, update=True): def constrain(self, transform, warning=True, trigger_parent=True):
""" """
:param transform: the :py:class:`GPy.core.transformations.Transformation` :param transform: the :py:class:`GPy.core.transformations.Transformation`
to constrain the this parameter to. to constrain the this parameter to.
@ -238,9 +267,9 @@ class Constrainable(Nameable, Indexable):
:py:class:`GPy.core.transformations.Transformation`. :py:class:`GPy.core.transformations.Transformation`.
""" """
if isinstance(transform, Transformation): if isinstance(transform, Transformation):
self._set_params(transform.initialize(self._get_params()), update=False) self._set_params(transform.initialize(self._get_params()), trigger_parent=trigger_parent)
reconstrained = self.unconstrain() reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, transform, warning, update) self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
def unconstrain(self, *transforms): def unconstrain(self, *transforms):
""" """
@ -251,30 +280,30 @@ class Constrainable(Nameable, Indexable):
""" """
return self._remove_from_index_operations(self.constraints, transforms) return self._remove_from_index_operations(self.constraints, transforms)
def constrain_positive(self, warning=True, update=True): def constrain_positive(self, warning=True, trigger_parent=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default positive constraint. Constrain this parameter to the default positive constraint.
""" """
self.constrain(Logexp(), warning=warning, update=update) self.constrain(Logexp(), warning=warning, trigger_parent=trigger_parent)
def constrain_negative(self, warning=True, update=True): def constrain_negative(self, warning=True, trigger_parent=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default negative constraint. Constrain this parameter to the default negative constraint.
""" """
self.constrain(NegativeLogexp(), warning=warning, update=update) self.constrain(NegativeLogexp(), warning=warning, trigger_parent=trigger_parent)
def constrain_bounded(self, lower, upper, warning=True, update=True): def constrain_bounded(self, lower, upper, warning=True, trigger_parent=True):
""" """
:param lower, upper: the limits to bound this parameter to :param lower, upper: the limits to bound this parameter to
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to lie within the given range. Constrain this parameter to lie within the given range.
""" """
self.constrain(Logistic(lower, upper), warning=warning, update=update) self.constrain(Logistic(lower, upper), warning=warning, trigger_parent=trigger_parent)
def unconstrain_positive(self): def unconstrain_positive(self):
""" """
@ -304,12 +333,11 @@ class Constrainable(Nameable, Indexable):
for p in self._parameters_: for p in self._parameters_:
p._parent_changed(parent) p._parent_changed(parent)
def _add_to_index_operations(self, which, reconstrained, transform, warning, update): def _add_to_index_operations(self, which, reconstrained, transform, warning):
if warning and reconstrained.size > 0: if warning and reconstrained.size > 0:
# TODO: figure out which parameters have changed and only print those
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name) print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
which.add(transform, self._raveled_index()) which.add(transform, self._raveled_index())
if update:
self._notify_parameters_changed()
def _remove_from_index_operations(self, which, transforms): def _remove_from_index_operations(self, which, transforms):
if len(transforms) == 0: if len(transforms) == 0:
@ -324,12 +352,76 @@ class Constrainable(Nameable, Indexable):
return removed return removed
class OptimizationHandlable(Constrainable, Observable):
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._get_params()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes():
return p[self._fixes_]
return p
def _set_params_transformed(self, p):
# inverse apply transformations for parameters and set the resulting parameters
self._set_params(self._untransform_params(p))
def _size_transformed(self):
return self.size - self.constraints[__fixed__].size
def _untransform_params(self, p):
p = p.copy()
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
return p
def _get_params(self):
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
class Parameterizable(Constrainable): def _set_params(self, params, trigger_parent=True):
# don't overwrite this anymore!
raise NotImplementedError, "This needs to be implemented in Param and Parametrizable"
#===========================================================================
# Optimization handles:
#===========================================================================
def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
return n[self._fixes_]
return n
#===========================================================================
# Randomizeable
#===========================================================================
def randomize(self):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
import numpy as np
# first take care of all parameters (from N(0,1))
# x = self._get_params_transformed()
x = np.random.randn(self._size_transformed())
x = self._untransform_params(x)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params(x)
# self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
import numpy as np
class Parameterizable(OptimizationHandlable):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList from GPy.core.parameterization.lists_and_dicts import ArrayList
_parameters_ = ParamList() _parameters_ = ArrayList()
self._added_names_ = set() self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
@ -352,7 +444,7 @@ class Parameterizable(Constrainable):
if pname in self._added_names_: if pname in self._added_names_:
del self.__dict__[pname] del self.__dict__[pname]
self._add_parameter_name(param) self._add_parameter_name(param)
else: elif pname not in dir(self):
self.__dict__[pname] = param self.__dict__[pname] = param
self._added_names_.add(pname) self._added_names_.add(pname)
@ -372,28 +464,26 @@ class Parameterizable(Constrainable):
import itertools import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _set_params(self, params, trigger_parent=True):
import itertools
[p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
if trigger_parent: min_priority = None
else: min_priority = -np.inf
self._notify_observers(None, min_priority)
def _set_gradient(self, g): def _set_gradient(self, g):
import itertools import itertools
[p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
def _get_params(self):
import numpy as np #===========================================================================
# don't overwrite this anymore! # TODO: not working yet
if not self.size: #===========================================================================
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
import itertools
[p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
self.parameters_changed()
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model""" """Returns a (deep) copy of the current model"""
import copy import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .array_core import ParamList from .lists_and_dicts import ArrayList
dc = dict() dc = dict()
for k, v in self.__dict__.iteritems(): for k, v in self.__dict__.iteritems():
@ -407,7 +497,7 @@ class Parameterizable(Constrainable):
dc['_direct_parent_'] = None dc['_direct_parent_'] = None
dc['_parent_index_'] = None dc['_parent_index_'] = None
dc['_parameters_'] = ParamList() dc['_parameters_'] = ArrayList()
dc['constraints'].clear() dc['constraints'].clear()
dc['priors'].clear() dc['priors'].clear()
dc['size'] = 0 dc['size'] = 0
@ -419,12 +509,7 @@ class Parameterizable(Constrainable):
s.add_parameter(p) s.add_parameter(p)
return s return s
def _notify_parameters_changed(self):
self.parameters_changed()
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
def parameters_changed(self): def parameters_changed(self):
""" """
This method gets called when parameters have changed. This method gets called when parameters have changed.

View file

@ -7,11 +7,11 @@ import cPickle
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation from param import ParamConcatenation
from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing, Gradcheckable
from transformations import __fixed__ from transformations import __fixed__
from array_core import ParamList from lists_and_dicts import ArrayList
class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable): class Parameterized(Parameterizable, Pickleable, Gradcheckable):
""" """
Parameterized class Parameterized class
@ -56,14 +56,39 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
def __init__(self, name=None, *a, **kw): def __init__(self, name=None, *a, **kw):
super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
self._in_init_ = True self._in_init_ = True
self._parameters_ = ParamList() self._parameters_ = ArrayList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
self._param_slices_ = [] self._param_slices_ = []
self._connect_parameters() self._connect_parameters()
del self._in_init_ del self._in_init_
def build_pydot(self, G=None):
import pydot # @UnresolvedImport
iamroot = False
if G is None:
G = pydot.Dot(graph_type='digraph')
iamroot=True
node = pydot.Node(id(self), shape='record', label=self.name)
G.add_node(node)
for child in self._parameters_:
child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node))
for o in self._observer_callables_.keys():
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)
edge = pydot.Edge(str(id(self)), str(id(o)), color='darkorange2', arrowhead='vee')
G.add_edge(edge)
if iamroot:
return G
return node
def add_parameter(self, param, index=None): def add_parameter(self, param, index=None):
""" """
:param parameters: the parameters to add :param parameters: the parameters to add
@ -80,6 +105,14 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
self.remove_parameter(param) self.remove_parameter(param)
self.add_parameter(param, index) self.add_parameter(param, index)
elif param not in self._parameters_: elif param not in self._parameters_:
if param.has_parent():
parent = param._direct_parent_
while parent is not None:
if parent is self:
from GPy.core.parameterization.parameter_core import HierarchyError
raise HierarchyError, "You cannot add a parameter twice into the hirarchy"
parent = parent._direct_parent_
param._direct_parent_.remove_parameter(param)
# make sure the size is set # make sure the size is set
if index is None: if index is None:
self.constraints.update(param.constraints, self.size) self.constraints.update(param.constraints, self.size)
@ -92,12 +125,16 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
self.constraints.update(param.constraints, start) self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start) self.priors.update(param.priors, start)
self._parameters_.insert(index, param) self._parameters_.insert(index, param)
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
self.size += param.size self.size += param.size
self._connect_parameters()
self._notify_parent_change()
self._connect_fixes()
else: else:
raise RuntimeError, """Parameter exists already added and no copy made""" raise RuntimeError, """Parameter exists already added and no copy made"""
self._connect_parameters()
self._notify_parent_change()
self._connect_fixes()
def add_parameters(self, *parameters): def add_parameters(self, *parameters):
@ -120,11 +157,19 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
del self._parameters_[param._parent_index_] del self._parameters_[param._parent_index_]
param._disconnect_parent() param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size) self.constraints.shift_left(start, param.size)
self._connect_fixes() self._connect_fixes()
self._connect_parameters() self._connect_parameters()
self._notify_parent_change() self._notify_parent_change()
parent = self._direct_parent_
while parent is not None:
parent._connect_fixes()
parent._connect_parameters()
parent._notify_parent_change()
parent = parent._direct_parent_
def _connect_parameters(self): def _connect_parameters(self):
# connect parameterlist to this parameterized object # connect parameterlist to this parameterized object
@ -145,6 +190,13 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
self._add_parameter_name(p) self._add_parameter_name(p)
#=========================================================================== #===========================================================================
# notification system
#===========================================================================
def _parameters_changed_notification(self, which):
self.parameters_changed()
def _pass_through_notify_observers(self, which):
self._notify_observers(which)
#===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
def pickle(self, f, protocol=-1): def pickle(self, f, protocol=-1):
@ -212,42 +264,7 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
g[self._offset_for(p) + numpy.array(list(i))] += g[self._raveled_index_for(t)] g[self._offset_for(p) + numpy.array(list(i))] += g[self._raveled_index_for(t)]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
#===========================================================================
# Optimization handles:
#===========================================================================
def _get_param_names(self):
n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
return n[self._fixes_]
return n
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._get_params()
[numpy.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes():
return p[self._fixes_]
return p
def _set_params_transformed(self, p):
# inverse apply transformations for parameters and set the resulting parameters
self._set_params(self._untransform_params(p))
def _untransform_params(self, p):
p = p.copy()
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
return p
#===========================================================================
# Indexable Handling
#===========================================================================
def _backtranslate_index(self, param, ind):
# translate an index in parameterized indexing into the index of param
ind = ind - self._offset_for(param)
ind = ind[ind >= 0]
internal_offset = param._internal_offset()
ind = ind[ind < param.size + internal_offset]
return ind
def _offset_for(self, param): def _offset_for(self, param):
# get the offset in the parameterized index array for param # get the offset in the parameterized index array for param
if param.has_parent(): if param.has_parent():
@ -272,34 +289,22 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
this is not in the global view of things! this is not in the global view of things!
""" """
return numpy.r_[:self.size] return numpy.r_[:self.size]
#===========================================================================
# Fixing parameters:
#===========================================================================
def _fixes_for(self, param):
if self._has_fixes():
return self._fixes_[self._raveled_index_for(param)]
return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
#=========================================================================== #===========================================================================
# Convenience for fixed, tied checking of param: # Convenience for fixed, tied checking of param:
#=========================================================================== #===========================================================================
def fixed_indices(self):
return np.array([x.is_fixed for x in self._parameters_])
def _is_fixed(self, param):
# returns if the whole param is fixed
if not self._has_fixes():
return False
return not self._fixes_[self._raveled_index_for(param)].any()
# return not self._fixes_[self._offset_for(param): self._offset_for(param)+param._realsize_].any()
@property @property
def is_fixed(self): def is_fixed(self):
for p in self._parameters_: for p in self._parameters_:
if not p.is_fixed: return False if not p.is_fixed: return False
return True return True
def _get_original(self, param): def _get_original(self, param):
# if advanced indexing is activated it happens that the array is a copy # if advanced indexing is activated it happens that the array is a copy
# you can retrieve the original param through this method, by passing # you can retrieve the original param through this method, by passing
# the copy here # the copy here
return self._parameters_[param._parent_index_] return self._parameters_[param._parent_index_]
#=========================================================================== #===========================================================================
# Get/set parameters: # Get/set parameters:
#=========================================================================== #===========================================================================
@ -340,7 +345,7 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
# Printing: # Printing:
#=========================================================================== #===========================================================================
def _short(self): def _short(self):
return self.hirarchy_name() return self.hierarchy_name()
@property @property
def flattened_parameters(self): def flattened_parameters(self):
return [xi for x in self._parameters_ for xi in x.flattened_parameters] return [xi for x in self._parameters_ for xi in x.flattened_parameters]
@ -348,11 +353,6 @@ class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable):
def _parameter_sizes_(self): def _parameter_sizes_(self):
return [x.size for x in self._parameters_] return [x.size for x in self._parameters_]
@property @property
def size_transformed(self):
if self._has_fixes():
return sum(self._fixes_)
return self.size
@property
def parameter_shapes(self): def parameter_shapes(self):
return [xi for x in self._parameters_ for xi in x.parameter_shapes] return [xi for x in self._parameters_ for xi in x.parameter_shapes]
@property @property

View file

@ -64,6 +64,36 @@ class Gaussian(Prior):
return np.random.randn(n) * self.sigma + self.mu return np.random.randn(n) * self.sigma + self.mu
class Uniform(Prior):
domain = _REAL
_instances = []
def __new__(cls, lower, upper): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().lower == lower and instance().upper == upper:
return instance()
o = super(Prior, cls).__new__(cls, lower, upper)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower, upper):
self.lower = float(lower)
self.upper = float(upper)
def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
def lnpdf(self, x):
region = (x>=self.lower) * (x<=self.upper)
return region
def lnpdf_grad(self, x):
return np.zeros(x.shape)
def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n)
class LogGaussian(Prior): class LogGaussian(Prior):
""" """
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.

View file

@ -4,10 +4,13 @@
import numpy as np import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import sys
import weakref import weakref
_lim_val = -np.log(sys.float_info.epsilon) import sys
#_lim_val = -np.log(sys.float_info.epsilon)
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)#
#=============================================================================== #===============================================================================
# Fixing constants # Fixing constants
@ -34,6 +37,15 @@ class Transformation(object):
def initialize(self, f): def initialize(self, f):
""" produce a sensible initial value for f(x)""" """ produce a sensible initial value for f(x)"""
raise NotImplementedError raise NotImplementedError
def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
axes = plt.gca()
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
def __str__(self): def __str__(self):
raise NotImplementedError raise NotImplementedError
def __repr__(self): def __repr__(self):
@ -42,7 +54,7 @@ class Transformation(object):
class Logexp(Transformation): class Logexp(Transformation):
domain = _POSITIVE domain = _POSITIVE
def f(self, x): def f(self, x):
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val)))) return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val))))
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x))) #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f): def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f) - 1.)) return np.where(f>_lim_val, f, np.log(np.exp(f) - 1.))
@ -54,6 +66,24 @@ class Logexp(Transformation):
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '+ve' return '+ve'
class LogexpNeg(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x>_lim_val, -x, -np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
def gradfactor(self, f):
return np.where(f>_lim_val, -1, -1 + np.exp(-f))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeLogexp(Transformation): class NegativeLogexp(Transformation):
domain = _NEGATIVE domain = _NEGATIVE

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..util.linalg import mdot
from gp import GP from gp import GP
from parameterization.param import Param from parameterization.param import Param
from ..inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
@ -58,17 +57,39 @@ class SparseGP(GP):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
self.kern.update_gradients_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) #gradients wrt kernel
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) dL_dKmm = self.grad_dict.pop('dL_dKmm')
else: self.kern.update_gradients_full(dL_dKmm, self.Z, None)
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) target = np.zeros(self.kern.size)
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) self.kern._collect_gradient(target)
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
self.kern._collect_gradient(target)
self.kern._set_gradient(target)
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else:
#gradients wrt kernel
target = np.zeros(self.kern.size)
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
self.kern._collect_gradient(target)
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
self.kern._collect_gradient(target)
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern._collect_gradient(target)
self.kern._set_gradient(target)
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False):
""" """
Make a prediction for the latent function values Make a prediction for the latent function values
""" """
if X_variance_new is None: if not isinstance(Xnew, VariationalPosterior):
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
@ -79,13 +100,13 @@ class SparseGP(GP):
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) Kx = self.kern.psi1(self.Z, Xnew)
mu = np.dot(Kx, self.Cpsi1V) mu = np.dot(Kx, self.posterior.woodbury_vector)
if full_cov: if full_cov:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
else: else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) Kxx = self.kern.psi0(self.Z, Xnew)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var return mu, var

View file

@ -16,16 +16,16 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
output_dim = 1 output_dim = 1
input_dim = 3 input_dim = 3
else: else:
input_dim = 1 input_dim = 2
output_dim = output_dim output_dim = output_dim
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
#lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
#k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
##+ GPy.kern.white(input_dim, 0.01) ##+ GPy.kern.white(input_dim, 0.01)
#) #)
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) #k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
@ -187,10 +187,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234) _np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None] x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x))) s1 = _np.vectorize(lambda x: _np.sin(x))
s2 = _np.vectorize(lambda x: _np.cos(x)**2) s2 = _np.vectorize(lambda x: _np.cos(x)**2)
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: x*_np.sin(x)) sS = _np.vectorize(lambda x: _np.cos(x))
s1 = s1(x) s1 = s1(x)
s2 = s2(x) s2 = s2(x)
@ -202,7 +202,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
s3 -= s3.mean(); s3 /= s3.std(0) s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0) sS -= sS.mean(); sS /= sS.std(0)
S1 = _np.hstack([s1, s2, sS]) S1 = _np.hstack([s1, sS])
S2 = _np.hstack([s2, s3, sS]) S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS]) S3 = _np.hstack([s3, sS])
@ -270,10 +270,11 @@ def bgplvm_simulation(optimize=True, verbose=1,
from GPy import kern from GPy import kern
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
if optimize: if optimize:
@ -281,7 +282,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m
@ -293,7 +294,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
@ -302,7 +303,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData() m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan m.Y[inan] = _np.nan
m.q.variance *= .1 m.X.variance *= .1
m.parameters_changed() m.parameters_changed()
m.Yreal = Y m.Yreal = Y
@ -311,7 +312,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m

View file

@ -23,11 +23,26 @@ class EP(object):
self.old_mutilde, self.old_vtilde = None, None self.old_mutilde, self.old_vtilde = None, None
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None):
num_data, output_dim = X.shape
assert output_dim ==1, "ep in 1D only (for now!)"
K = kern.K(X) K = kern.K(X)
mu_tilde, tau_tilde = self.expectation_propagation() mu_tilde, tau_tilde = self.expectation_propagation()
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)
alpha, _ = dpotrs(LW, mu_tilde, lower=1)
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde))
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
#TODO: what abot derivatives of the likelihood parameters?
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
def expectation_propagation(self, K, Y, Y_metadata, likelihood) def expectation_propagation(self, K, Y, Y_metadata, likelihood)

View file

@ -49,8 +49,7 @@ class ExactGaussianInference(object):
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
kern.update_gradients_full(dL_dK, X) #TODO: does this really live here?
likelihood.update_gradients(np.diag(dL_dK)) likelihood.update_gradients(np.diag(dL_dK))
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK} return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}

View file

@ -60,8 +60,7 @@ class VarDTC(object):
_, output_dim = Y.shape _, output_dim = Y.shape
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.squeeze(likelihood.variance) beta = 1./np.fmax(likelihood.variance, 1e-6)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = self.get_YYTfactor(Y) #self.YYTfactor = self.get_YYTfactor(Y)
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
@ -204,18 +203,17 @@ class VarDTCMissingData(object):
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0 = kern.psi0(Z, X) psi0_all = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X) psi1_all = kern.psi1(Z, X)
psi2 = kern.psi2(Z, X) psi2_all = kern.psi2(Z, X)
else: else:
uncertain_inputs = False uncertain_inputs = False
psi0 = kern.Kdiag(X) psi0_all = kern.Kdiag(X)
psi1 = kern.K(X, Z) psi1_all = kern.K(X, Z)
psi2 = None psi2_all = None
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./likelihood.variance beta_all = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools import itertools

View file

@ -101,7 +101,7 @@ class Add(Kern):
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from white import White
from rbf import RBF from rbf import RBF
#from rbf_inv import RBFInv #from rbf_inv import RBFInv
@ -124,10 +124,10 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from white import White
from rbf import RBF from rbf import RBF
#from rbf_inv import rbfinv #from rbf_inv import rbfinv
@ -151,10 +151,10 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
return target return target
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import white from white import white
from rbf import rbf from rbf import rbf
#from rbf_inv import rbfinv #from rbf_inv import rbfinv
@ -179,25 +179,17 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a target_mu += a
target_S += b target_S += b
return target_mu, target_S return target_mu, target_S
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
def input_sensitivity(self): def input_sensitivity(self):
in_sen = np.zeros((self.input_dim, self.num_params)) in_sen = np.zeros((self.num_params, self.input_dim))
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)): for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
in_sen[i_s, i] = p.input_sensitivity() in_sen[i, i_s] = p.input_sensitivity()
return in_sen return in_sen
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class, Get the current state of the class,

View file

@ -60,17 +60,6 @@ class Coregionalize(Kern):
def K(self, X, X2=None): def K(self, X, X2=None):
index = np.asarray(X, dtype=np.int) index = np.asarray(X, dtype=np.int)
#here's the old code (numpy)
#if index2 is None:
#index2 = index
#else:
#index2 = np.asarray(index2, dtype=np.int)
#false_target = target.copy()
#ii, jj = np.meshgrid(index, index2)
#ii, jj = ii.T, jj.T
#false_target += self.B[ii, jj]
if X2 is None: if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64) target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code=""" code="""

View file

@ -26,55 +26,71 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def Kdiag(self, Xa): def Kdiag(self, Xa):
raise NotImplementedError raise NotImplementedError
def psi0(self,Z,variational_posterior): def psi0(self, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
def psi1(self,Z,variational_posterior): def psi1(self, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
def psi2(self,Z,variational_posterior): def psi2(self, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2): def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
target = np.zeros(self.size)
self.update_gradients_diag(dL_dKdiag, X)
self._collect_gradient(target)
self.update_gradients_full(dL_dKnm, X, Z)
self._collect_gradient(target)
self.update_gradients_full(dL_dKmm, Z, None)
self._collect_gradient(target)
self._set_gradient(target)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" """
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
"""
raise NotImplementedError raise NotImplementedError
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
grad = self.gradients_X(dL_dKmm, Z) def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
grad += self.gradients_X(dL_dKnm.T, Z, X) """
return grad Returns the derivative of the objective wrt Z, using the chain rule
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): through the expectation variables.
"""
raise NotImplementedError raise NotImplementedError
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError raise NotImplementedError
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args)
def plot_ARD(self, *args, **kw): def plot_ARD(self, *args, **kw):
if "matplotlib" in sys.modules: """
from ...plotting.matplot_dep import kernel_plots See :class:`~GPy.plotting.matplot_dep.kernel_plots`
self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__ """
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args,**kw) return kernel_plots.plot_ARD(self,*args,**kw)
def input_sensitivity(self): def input_sensitivity(self):
""" """
Returns the sensitivity for each dimension of this kernel. Returns the sensitivity for each dimension of this kernel.
""" """
return np.zeros(self.input_dim) return np.zeros(self.input_dim)
def __add__(self, other): def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """ """ Overloading of the '+' operator. for more control, see self.add """
return self.add(other) return self.add(other)
@ -96,10 +112,12 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from add import Add
return Add([self, other], tensor) kernels = []
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_)
def __call__(self, X, X2=None): else: kernels.append(self)
return self.K(X, X2) if not tensor and isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other)
return Add(kernels, tensor)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
@ -113,7 +131,8 @@ class Kern(Parameterized):
def prod(self, other, tensor=False): def prod(self, other, tensor=False):
""" """
Multiply two kernels (either on the same space, or on the tensor product of the input space). Multiply two kernels (either on the same space, or on the tensor
product of the input space).
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern

View file

@ -9,7 +9,7 @@ from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.caching import cache_this from ...util.caching import Cache_this
class Linear(Kern): class Linear(Kern):
""" """
@ -22,22 +22,25 @@ class Linear(Kern):
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i` :param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter) :type variances: array or list of the appropriate size (or float if there
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, input_dim, variances=None, ARD=False, name='linear'): def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name) super(Linear, self).__init__(input_dim, name)
self.ARD = ARD self.ARD = ARD
if ARD == False: if not ARD:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel" assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else: else:
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
@ -47,13 +50,8 @@ class Linear(Kern):
self.variances = Param('variances', variances, Logexp()) self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances) self.add_parameter(self.variances)
self.variances.add_observer(self, self._on_changed)
def _on_changed(self, obj): @Cache_this(limit=2)
#TODO: move this to base class? isnt it jst for the caching?
self._notify_observers()
#@cache_this(limit=3, reset_on_self=True)
def K(self, X, X2=None): def K(self, X, X2=None):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
@ -64,7 +62,7 @@ class Linear(Kern):
else: else:
return self._dot_product(X, X2) * self.variances return self._dot_product(X, X2) * self.variances
#@cache_this(limit=3, reset_on_self=False) @Cache_this(limit=1, ignore_args=(0,))
def _dot_product(self, X, X2=None): def _dot_product(self, X, X2=None):
if X2 is None: if X2 is None:
return tdot(X) return tdot(X)
@ -103,7 +101,6 @@ class Linear(Kern):
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
# variational #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -112,38 +109,34 @@ class Linear(Kern):
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return self.K(variational_posterior.mean, Z) #the variance, it does nothing return self.K(variational_posterior.mean, Z) #the variance, it does nothing
@Cache_this(limit=1)
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
ZA = Z * self.variances ZA = Z * self.variances
ZAinner = self._ZAinner(variational_posterior, Z) ZAinner = self._ZAinner(variational_posterior, Z)
return np.dot(ZAinner, ZA.T) return np.dot(ZAinner, ZA.T)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu, S = variational_posterior.mean, variational_posterior.variance #psi1
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
# psi0: # psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: grad = tmp.sum(0) if self.ARD: self.variances.gradient += tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum()) else: self.variances.gradient += tmp.sum()
#psi1
self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient
#psi2 #psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) if self.ARD:
if self.ARD: grad += tmp.sum(0).sum(0).sum(0) tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :])
else: grad += tmp.sum() self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0)
#from Kmm else:
self.update_gradients_full(dL_dKmm, Z, None) self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
self.variances.gradient += grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Kmm
grad = self.gradients_X(dL_dKmm, Z, None)
#psi1 #psi1
grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
#psi2 #psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
# psi0 # psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
@ -160,7 +153,7 @@ class Linear(Kern):
#--------------------------------------------------# #--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S): def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim # Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
@ -203,16 +196,15 @@ class Linear(Kern):
weave_options = {'headers' : ['<omp.h>'], weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
mu = vp.mean
mu = pv.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(pv, Z) AZA = self.variances*self._ZAinner(vp, Z)
code=""" code="""
int n,m,mm,q; int n,m,mm,q;
#pragma omp parallel for private(n,mm,q) #pragma omp parallel for private(n,mm,q)
@ -234,23 +226,25 @@ class Linear(Kern):
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(pv.mean) mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _mu2S(self, pv): @Cache_this(limit=1, ignore_args=(0,))
return np.square(pv.mean) + pv.variance def _mu2S(self, vp):
return np.square(vp.mean) + vp.variance
def _ZAinner(self, pv, Z): @Cache_this(limit=1)
def _ZAinner(self, vp, Z):
ZA = Z*self.variances ZA = Z*self.variances
inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) inner = (vp.mean[:, None, :] * vp.mean[:, :, None])
diag_indices = np.diag_indices(pv.mean.shape[1], 2) diag_indices = np.diag_indices(vp.mean.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += pv.variance inner[:, diag_indices[0], diag_indices[1]] += vp.variance
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self): def input_sensitivity(self):
if self.ARD: return self.variances if self.ARD: return self.variances

View file

@ -4,12 +4,11 @@
import numpy as np import numpy as np
from scipy import weave from scipy import weave
from kern import Kern from ...util.misc import param_to_array
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from stationary import Stationary from stationary import Stationary
from GPy.util.caching import Cache_this
from ...core.parameterization import variational
from rbf_psi_comp import ssrbf_psi_comp
class RBF(Stationary): class RBF(Stationary):
""" """
@ -21,7 +20,7 @@ class RBF(Stationary):
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='RBF'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name) super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
self.weave_options = {} self.weave_options = {}
@ -35,92 +34,144 @@ class RBF(Stationary):
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def parameters_changed(self):
# reset cached results
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return self.Kdiag(variational_posterior.mean) return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
mu = variational_posterior.mean if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
S = variational_posterior.variance psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
self._psi_computations(Z, mu, S) else:
return self._psi1 _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
mu = variational_posterior.mean if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
S = variational_posterior.variance psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
self._psi_computations(Z, mu, S)
return self._psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#contributions from Kmm
sself.update_gradients_full(dL_dKmm, Z)
mu = variational_posterior.mean
S = variational_posterior.variance
self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2
#contributions from psi0:
self.variance.gradient += np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum()
else: else:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0) _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2
#from psi2 def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
d_var = 2.*self._psi2 / self.variance # Spike-and-Slab GPLVM
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * self._psi2_denom) if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
return
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
self.lengthscale.gradient = 0.
#from psi1
denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
else:
self.lengthscale.gradient += dpsi1_dlength.sum()
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2
S = variational_posterior.variance
_, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
if not self.ARD:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum()
else: else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu = variational_posterior.mean # Spike-and-Slab GPLVM
S = variational_posterior.variance if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
self._psi_computations(Z, mu, S) _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
l2 = self.lengthscale **2 _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad
#psi1 elif isinstance(variational_posterior, variational.NormalPosterior):
denominator = (l2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) l2 = self.lengthscale **2
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2 #psi1
term1 = self._psi2_Zdist / l2 # num_inducing, num_inducing, input_dim denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
term2 = self._psi2_mudist / self._psi2_denom / l2 # N, num_inducing, num_inducing, input_dim grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2))
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
grad += self.gradients_X(dL_dKmm, Z, None) #psi2
Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
term1 = Zdist / l2 # M, M, Q
S = variational_posterior.variance
term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q
return grad grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2)
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): return grad
mu = variational_posterior.mean else:
S = variational_posterior.variance raise ValueError, "unknown distriubtion received for psi-statistics"
self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#psi1 # Spike-and-Slab GPLVM
tmp = self._psi1[:, :, None] / l2 / self._psi1_denom if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) ndata = variational_posterior.mean.shape[0]
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
#psi2 _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
tmp = self._psi2[:, :, :, None] / l2 / self._psi2_denom _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) #psi1
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
tmp = psi1[:, :, None] / l2 / denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
#psi2
_, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
S = variational_posterior.variance
tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2)
grad_mu += -2.*np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist)
grad_S += np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1))
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
return grad_mu, grad_S return grad_mu, grad_S
@ -128,142 +179,79 @@ class RBF(Stationary):
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
def _dL_dlengthscales_via_K(self, dL_dK, X, X2): @Cache_this(limit=1)
""" def _psi1computations(self, Z, vp):
A helper function for update_gradients_* methods mu, S = vp.mean, vp.variance
l2 = self.lengthscale **2
Computes the derivative of the objective L wrt the lengthscales via denom = S[:, None, :] / l2 + 1. # N,1,Q
dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl) dist_sq = np.square(dist) / l2 / denom # N,M,Q
exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
assumes self._K_computations has just been called. psi1 = self.variance * np.exp(exponent) # N,M
return denom, dist, dist_sq, psi1
This is only valid if self.ARD=True
"""
target = np.zeros(self.input_dim)
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
X, dvardLdK, var_len3 = param_to_array(X, dvardLdK, var_len3)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2, dvardLdK, var_len3 = param_to_array(X, X2, dvardLdK, var_len3)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target
@Cache_this(limit=1, ignore_args=(0,))
def _Z_distances(self, Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
return Zhat, Zdist
def _psi_computations(self, Z, mu, S): @Cache_this(limit=1)
# here are the "statistics" for psi1 and psi2 def _psi2computations(self, Z, vp):
Z_changed = not fast_array_equal(Z, self._Z) mu, S = vp.mean, vp.variance
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S): N, Q = mu.shape
# something's changed. recompute EVERYTHING M = Z.shape[0]
l2 = self.lengthscale **2
# psi1 #compute required distances
self._psi1_denom = S[:, None, :] / l2 + 1. Zhat, Zdist = self._Z_distances(Z)
self._psi1_dist = Z[None, :, :] - mu[:, None, :] Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q
self._psi1_dist_sq = np.square(self._psi1_dist) / l2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2 #allocate memory for the things we want to compute
self._psi2_denom = 2.*S[:, None, None, :] / l2 + 1. # N,M,M,Q mudist = np.empty((N, M, M, Q))
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat) mudist_sq = np.empty((N, M, M, Q))
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q psi2 = np.empty((N, M, M))
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(l2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching l2 = self.lengthscale **2
self._Z, self._mu, self._S = Z, mu, S denom = (2.*S[:,None,None,:] / l2) + 1. # N,Q
half_log_denom = 0.5 * np.log(denom[:,0,0,:])
denom_l2 = denom[:,0,0,:]*l2
def weave_psi2(self, mu, Zhat): variance_sq = float(np.square(self.variance))
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = np.float64(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale **2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2**2
code = """ code = """
double tmp; double tmp, exponent_tmp;
#pragma omp parallel for private(tmp) #pragma omp parallel for private(tmp, exponent_tmp)
for (int n=0; n<N; n++){ for (int n=0; n<N; n++)
for (int m=0; m<num_inducing; m++){ {
for (int mm=0; mm<(m+1); mm++){ for (int m=0; m<M; m++)
for (int q=0; q<input_dim; q++){ {
//compute mudist for (int mm=0; mm<(m+1); mm++)
tmp = mu(n,q) - Zhat(m,mm,q); {
mudist(n,m,mm,q) = tmp; exponent_tmp = 0.0;
mudist(n,mm,m,q) = tmp; for (int q=0; q<Q; q++)
{
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq //now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q); tmp = tmp*tmp/denom_l2(n,q);
mudist_sq(n,m,mm,q) = tmp; mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp; mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent //now exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q); tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
psi2_exponent(n,mm,m) += tmp; exponent_tmp += tmp;
if (m !=mm){ }
psi2_exponent(n,m,mm) += tmp; //compute psi2 by exponontiating
} psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
//psi2 would be computed like this, but np is faster psi2(n,mm,m) = psi2(n,m,mm);
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
} }
} }
} }
""" """
support_code = """ support_code = """
@ -272,11 +260,51 @@ class RBF(Stationary):
""" """
mu = param_to_array(mu) mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2 return Zdist, Zdist_sq, mudist, mudist_sq, psi2
def input_sensitivity(self): def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2):
if self.ARD: return 1./self.lengthscale
else: return (1./self.lengthscale).repeat(self.input_dim) #here's the einsum equivalent, it's ~3 times slower
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
result = np.zeros(self.input_dim)
code = """
double tmp;
for(int q=0; q<Q; q++)
{
tmp = 0.0;
#pragma omp parallel for reduction(+:tmp)
for(int n=0; n<N; n++)
{
for(int m=0; m<M; m++)
{
//diag terms
tmp += dL_dpsi2(n,m,m) * psi2(n,m,m) * (Zdist_sq(m,m,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,m,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
//off-diag terms
for(int mm=0; mm<m; mm++)
{
tmp += 2.0 * dL_dpsi2(n,m,mm) * psi2(n,m,mm) * (Zdist_sq(m,mm,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,mm,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
}
}
}
result(q) = tmp;
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
N,Q = S.shape
M = psi2.shape[-1]
S = param_to_array(S)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
type_converters=weave.converters.blitz, **self.weave_options)
return 2.*result*self.lengthscale

View file

@ -0,0 +1,2 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)

View file

@ -0,0 +1,111 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the psi statistics computation
"""
import numpy as np
def _Z_distances(Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
return Zhat, Zdist
# def _psi1computations(self, Z, vp):
# mu, S = vp.mean, vp.variance
# l2 = lengthscale **2
# denom = S[:, None, :] / l2 + 1. # N,1,Q
# dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
# dist_sq = np.square(dist) / l2 / denom # N,M,Q
# exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
# psi1 = self.variance * np.exp(exponent) # N,M
# return denom, dist, dist_sq, psi1
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dpsi1_dvariance = _psi1 / variance # NxM
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2 NxMxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale

View file

@ -6,8 +6,8 @@ from kern import Kern
import numpy as np import numpy as np
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.config import * from ...util.config import *
from ...util.caching import cache_this
from stationary import Stationary from stationary import Stationary
from rbf_psi_comp import ssrbf_psi_comp
class SSRBF(Stationary): class SSRBF(Stationary):
""" """
@ -55,107 +55,69 @@ class SSRBF(Stationary):
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, posterior_variational): def psi0(self, Z, variational_posterior):
ret = np.empty(posterior_variational.mean.shape[0]) ret = np.empty(variational_posterior.mean.shape[0])
ret[:] = self.variance ret[:] = self.variance
return ret return ret
def psi1(self, Z, posterior_variational): def psi1(self, Z, variational_posterior):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return self._psi1 return _psi1
def psi2(self, Z, posterior_variational): def psi2(self, Z, variational_posterior):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return self._psi2 return _psi2
def dL_dpsi0_dmuSgamma(self, dL_dpsi0, Z, mu, S, gamma, target_mu, target_S, target_gamma): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
pass _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
def dL_dpsi1_dmuSgamma(self, dL_dpsi1, Z, mu, S, gamma, target_mu, target_S, target_gamma):
self._psi_computations(Z, mu, S, gamma)
target_mu += (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1)
target_S += (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1)
target_gamma += (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1)
def dL_dpsi2_dmuSgamma(self, dL_dpsi2, Z, mu, S, gamma, target_mu, target_S, target_gamma):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S, gamma)
target_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(mu.shape[0],-1,mu.shape[1]).sum(axis=1)
target_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(S.shape[0],-1,S.shape[1]).sum(axis=1)
target_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(gamma.shape[0],-1,gamma.shape[1]).sum(axis=1)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
#from psi1 #from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._dpsi1_dvariance) self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*self._dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from psi2 #from psi2
self.variance.gradient += (dL_dpsi2 * self._dpsi2_dvariance).sum() self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * self._dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from Kmm
self._K_computations(Z, None)
dvardLdK = self._K_dvar * dL_dKmm
var_len3 = self.variance / (np.square(self.lengthscale)*self.lengthscale)
self.variance.gradient += np.sum(dvardLdK)
self.lengthscale.gradient += (np.square(Z[:,None,:]-Z[None,:,:])*dvardLdK[:,:,None]).reshape(-1,self.input_dim).sum(axis=0)*var_len3
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad = (dL_dpsi1[:, :, None] * self._dpsi1_dZ).sum(axis=0) grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2 #psi2
grad += (dL_dpsi2[:, :, :, None] * self._dpsi2_dZ).sum(axis=0).sum(axis=1) grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
grad += self.gradients_X(dL_dKmm, Z, None)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ndata = posterior_variational.mean.shape[0] ndata = variational_posterior.mean.shape[0]
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad_mu = (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1) grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2 #psi2
grad_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma return grad_mu, grad_S, grad_gamma
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
if X2==None:
_K_dist = X[:,None,:] - X[None,:,:]
_K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-2.*_K_dist/np.square(self.lengthscale))
dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
else:
_K_dist = X[:,None,:] - X2[None,:,:]
_K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-_K_dist/np.square(self.lengthscale))
dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
return dL_dX
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
@cache_this(1) #@cache_this(1)
def _K_computations(self, X, X2): def _K_computations(self, X, X2):
""" """
K(X,X2) - X is NxQ K(X,X2) - X is NxQ
@ -175,78 +137,3 @@ class SSRBF(Stationary):
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :]) self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2) self._K_dvar = np.exp(-0.5 * self._K_dist2)
@cache_this(1)
def _psi_computations(self, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
# _psi2 NxMxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
lengthscale2 = np.square(self.lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / self.lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = self.variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
self._psi1 = self.variance * np.exp(_psi1_exp_sum) # NxM
self._dpsi1_dvariance = self._psi1 / self.variance # NxM
self._dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
self._dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
self._dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
self._dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
self._dpsi1_dlengthscale = 2.*self.lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(self.variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
self._psi2 = np.square(self.variance) * np.exp(_psi2_exp_sum) # N,M,M
self._dpsi2_dvariance = 2. * self._psi2/self.variance # NxMxM
self._dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
self._dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
self._dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
self._dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
self._dpsi2_dlengthscale = 2.*self.lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ

View file

@ -25,10 +25,10 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape) return np.zeros(Z.shape)
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -61,8 +61,8 @@ class White(Static):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()
class Bias(Static): class Bias(Static):
@ -86,6 +86,6 @@ class Bias(Static):
ret[:] = self.variance**2 ret[:] = self.variance**2
return ret return ret
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()

View file

@ -9,8 +9,38 @@ from ...util.linalg import tdot
from ... import util from ... import util
import numpy as np import numpy as np
from scipy import integrate from scipy import integrate
from ...util.caching import Cache_this
class Stationary(Kern): class Stationary(Kern):
"""
Stationary kernels (covariance functions).
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
The lengthscale(s) and variance parameters are added to the structure automatically.
"""
def __init__(self, input_dim, variance, lengthscale, ARD, name): def __init__(self, input_dim, variance, lengthscale, ARD, name):
super(Stationary, self).__init__(input_dim, name) super(Stationary, self).__init__(input_dim, name)
self.ARD = ARD self.ARD = ARD
@ -19,11 +49,11 @@ class Stationary(Kern):
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
else: else:
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad lengthscales" assert lengthscale.size in [1, input_dim], "Bad number of lengthscales"
if lengthscale.size != input_dim: if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale lengthscale = np.ones(input_dim)*lengthscale
else: else:
@ -34,38 +64,43 @@ class Stationary(Kern):
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
def K_of_r(self, r): def K_of_r(self, r):
raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class" raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class" raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
return self.K_of_r(r) return self.K_of_r(r)
def _dist(self, X, X2): @Cache_this(limit=3, ignore_args=())
if X2 is None: def dK_dr_via_X(self, X, X2):
X2 = X #a convenience function, so we can cache dK_dr
return X[:, None, :] - X2[None, :, :] return self.dK_dr(self._scaled_dist(X, X2))
@Cache_this(limit=5, ignore_args=(0,))
def _unscaled_dist(self, X, X2=None): def _unscaled_dist(self, X, X2=None):
""" """
Compute the square distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None. each pair of rows of X if X2 is None.
""" """
if X2 is None: if X2 is None:
Xsq = np.sum(np.square(X),1) Xsq = np.sum(np.square(X),1)
return np.sqrt(-2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])) r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
return np.sqrt(r2)
else: else:
X1sq = np.sum(np.square(X),1) X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1) X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
@Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
""" """
Efficiently compute the scaled distance, r. Efficiently compute the scaled distance, r.
r = \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate this case we compute the unscaled distance first (in a separate
@ -73,20 +108,12 @@ class Stationary(Kern):
""" """
if self.ARD: if self.ARD:
if X2 is None: if X2 is not None:
Xl = X/self.lengthscale X2 = X2 / self.lengthscale
Xsq = np.sum(np.square(Xl),1) return self._unscaled_dist(X/self.lengthscale, X2)
return np.sqrt(np.sqrt(-2.*tdot(Xl) +(Xsq[:,None] + Xsq[None,:])))
else:
X1l = X/self.lengthscale
X2l = X2/self.lengthscale
X1sq = np.sum(np.square(X1l),1)
X2sq = np.sum(np.square(X2l),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
else: else:
return self._unscaled_dist(X, X2)/self.lengthscale return self._unscaled_dist(X, X2)/self.lengthscale
def Kdiag(self, X): def Kdiag(self, X):
ret = np.empty(X.shape[0]) ret = np.empty(X.shape[0])
ret[:] = self.variance ret[:] = self.variance
@ -97,20 +124,23 @@ class Stationary(Kern):
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
rinv = self._inv_dist(X, X2) self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
dL_dr = self.dK_dr(r) * dL_dK
#now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD: if self.ARD:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0) #d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else: else:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
self.variance.gradient = np.sum(K * dL_dK)/self.variance
def _inv_dist(self, X, X2=None): def _inv_dist(self, X, X2=None):
""" """
@ -118,7 +148,7 @@ class Stationary(Kern):
diagonal, where we return zero (the distance on the diagonal is zero). diagonal, where we return zero (the distance on the diagonal is zero).
This term appears in derviatives. This term appears in derviatives.
""" """
dist = self._scaled_dist(X, X2) dist = self._scaled_dist(X, X2).copy()
if X2 is None: if X2 is None:
nondiag = util.diag.offdiag_view(dist) nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag nondiag[:] = 1./nondiag
@ -130,10 +160,11 @@ class Stationary(Kern):
""" """
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
""" """
r = self._scaled_dist(X, X2)
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr(r) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
#The high-memory numpy way: ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 #The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :]
#ret = np.sum((invdist*dL_dr)[:,:,None]*d,1)/self.lengthscale**2
#if X2 is None: #if X2 is None:
#ret *= 2. #ret *= 2.
@ -143,7 +174,7 @@ class Stationary(Kern):
tmp *= 2. tmp *= 2.
X2 = X X2 = X
ret = np.empty(X.shape, dtype=np.float64) ret = np.empty(X.shape, dtype=np.float64)
[np.copyto(ret[:,q], np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), 1)) for q in xrange(self.input_dim)] [np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
ret /= self.lengthscale**2 ret /= self.lengthscale**2
return ret return ret
@ -216,7 +247,7 @@ class Matern52(Stationary):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
@ -227,7 +258,7 @@ class Matern52(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self,F,F1,F2,F3,lower,upper): def Gram_matrix(self, F, F1, F2, F3, lower, upper):
""" """
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
@ -312,4 +343,8 @@ class RatQuad(Stationary):
grad = np.sum(dL_dK*dK_dpow) grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad self.power.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
self.power.gradient = 0.

View file

@ -49,6 +49,7 @@ class BayesianGPLVM(SparseGP):
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.parameters_changed()
def _getstate(self): def _getstate(self):
""" """
@ -66,7 +67,7 @@ class BayesianGPLVM(SparseGP):
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
@ -149,37 +150,6 @@ class BayesianGPLVM(SparseGP):
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
class BayesianGPLVMWithMissingData(BayesianGPLVM):
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
from ..util.subarray_and_sorting import common_subarrays
self.subarrays = common_subarrays(Y)
import ipdb;ipdb.set_trace()
BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs)
def parameters_changed(self):
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.KL_divergence()
dL_dmu, dL_dS = self.dL_dmuS()
# dL:
self.X.mean.gradient = dL_dmu
self.X.variance.gradient = dL_dS
# dKL:
self.X.mean.gradient -= self.X.mean
self.X.variance.gradient -= (1. - (1. / (self.X.variance))) * 0.5
if __name__ == '__main__':
import numpy as np
X = np.random.randn(20,2)
W = np.linspace(0,1,10)[None,:]
Y = (X*W).sum(1)
missing = np.random.binomial(1,.1,size=Y.shape)
pass
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
""" """

View file

@ -41,7 +41,7 @@ class GPLVM(GP):
def parameters_changed(self): def parameters_changed(self):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self._dL_dK, self.X, None) self.X.gradient = self.kern.gradients_X(self.dL_dK, self.X, None)
def _getstate(self): def _getstate(self):
return GP._getstate(self) return GP._getstate(self)

View file

@ -8,6 +8,7 @@ from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..util.misc import param_to_array from ..util.misc import param_to_array
from ..core.parameterization.variational import NormalPosterior
class SparseGPRegression(SparseGP): class SparseGPRegression(SparseGP):
""" """
@ -44,7 +45,10 @@ class SparseGPRegression(SparseGP):
assert Z.shape[1] == input_dim assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
if not (X_variance is None):
X = NormalPosterior(X,X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
def _getstate(self): def _getstate(self):

View file

@ -58,7 +58,7 @@ class SSGPLVM(SparseGP):
super(SSGPLVM, self).parameters_changed() super(SSGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)

View file

@ -14,3 +14,4 @@ import visualize
import latent_space_visualizations import latent_space_visualizations
import netpbmfile import netpbmfile
import inference_plots import inference_plots
import maps

View file

@ -6,26 +6,47 @@ import Tango
import pylab as pb import pylab as pb
import numpy as np import numpy as np
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],axes=None,**kwargs): def ax_default(fignum, ax):
if axes is None: if ax is None:
axes = pb.gca() fig = pb.figure(fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
return fig, ax
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
_, axes = ax_default(fignum, ax)
#here's the mean
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],ax=None,fignum=None,xlabel='x',ylabel='y',**kwargs):
_, axes = ax_default(fignum, ax)
mu = mu.flatten() mu = mu.flatten()
x = x.flatten() x = x.flatten()
lower = lower.flatten() lower = lower.flatten()
upper = upper.flatten() upper = upper.flatten()
plots = []
#here's the mean #here's the mean
axes.plot(x,mu,color=edgecol,linewidth=2) plots.append(meanplot(x, mu, edgecol, axes))
#here's the box #here's the box
kwargs['linewidth']=0.5 kwargs['linewidth']=0.5
if not 'alpha' in kwargs.keys(): if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 0.3 kwargs['alpha'] = 0.3
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs) plots.append(axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs))
#this is the edge: #this is the edge:
axes.plot(x,upper,color=edgecol,linewidth=0.2) plots.append(meanplot(x, upper,color=edgecol,linewidth=0.2,ax=axes))
axes.plot(x,lower,color=edgecol,linewidth=0.2) plots.append(meanplot(x, lower,color=edgecol,linewidth=0.2,ax=axes))
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
return plots
def removeRightTicks(ax=None): def removeRightTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()

View file

@ -2,6 +2,7 @@ import pylab as pb
import numpy as np import numpy as np
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
from ...util.misc import param_to_array from ...util.misc import param_to_array
from ...core.parameterization.variational import VariationalPosterior
from .base_plots import x_frame2D from .base_plots import x_frame2D
import itertools import itertools
import Tango import Tango
@ -19,7 +20,7 @@ def most_significant_input_dimensions(model, which_indices):
input_1, input_2 = 0, 1 input_1, input_2 = 0, 1
else: else:
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2] input_1, input_2 = np.argsort(model.kern.input_sensitivity())[::-1][:2]
except: except:
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
@ -43,26 +44,29 @@ def plot_latent(model, labels=None, which_indices=None,
labels = np.ones(model.num_data) labels = np.ones(model.num_data)
input_1, input_2 = most_significant_input_dimensions(model, which_indices) input_1, input_2 = most_significant_input_dimensions(model, which_indices)
X = param_to_array(model.X)
# first, plot the output variance as a function of the latent space #fethch the data points X that we'd like to plot
Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution) X = model.X
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) if isinstance(X, VariationalPosterior):
X = param_to_array(X.mean)
else:
X = param_to_array(X)
# create a function which computes the shading of latent space according to the output variance
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
mu, var, low, up = model.predict(Xtest_full) mu, var, low, up = model.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
return np.log(var) return np.log(var)
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)), tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary) cmap=pb.cm.binary)
# ax.imshow(var.reshape(resolution, resolution).T,
# extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear', origin='lower')
# make sure labels are in order of input: # make sure labels are in order of input:
ulabels = [] ulabels = []
for lab in labels: for lab in labels:
@ -95,8 +99,8 @@ def plot_latent(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend: if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1) ax.legend(loc=0, numpoints=1)
ax.set_xlim(xmin[0], xmax[0]) #ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) #ax.set_ylim(xmin[1], xmax[1])
ax.grid(b=False) # remove the grid if present, it doesn't look good ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio

View file

@ -6,7 +6,7 @@ import pylab as pb
import Tango import Tango
from matplotlib.textpath import TextPath from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy from matplotlib.transforms import offset_copy
from ...kern import Linear from .base_plots import ax_default
@ -52,11 +52,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
pass '' to not print a title pass '' to not print a title
pass None for a generic title pass None for a generic title
""" """
if ax is None: fig, ax = ax_default(fignum,ax)
fig = pb.figure(fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
if title is None: if title is None:
ax.set_title('ARD parameters, %s kernel' % kernel.name) ax.set_title('ARD parameters, %s kernel' % kernel.name)
@ -70,13 +66,13 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
bottom = 0 bottom = 0
x = np.arange(kernel.input_dim) x = np.arange(kernel.input_dim)
for i in range(ard_params.shape[-1]): for i in range(ard_params.shape[0]):
c = Tango.nextMedium() c = Tango.nextMedium()
bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom)) bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom))
bottom += ard_params[:,i] bottom += ard_params[i,:]
ax.set_xlim(-.5, kernel.input_dim - .5) ax.set_xlim(-.5, kernel.input_dim - .5)
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i]) add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
if legend: if legend:
if title is '': if title is '':

View file

@ -4,7 +4,6 @@ import matplotlib.patches as patches
from matplotlib.patches import Polygon from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection from matplotlib.collections import PatchCollection
#from matplotlib import cm #from matplotlib import cm
import shapefile
import re import re
pb.ion() pb.ion()
@ -119,7 +118,7 @@ def plot_bbox(sf,bbox,inside_only=True):
A,B,C,D = bbox A,B,C,D = bbox
plot(shape_records,xlims=[bbox[0],bbox[2]],ylims=[bbox[1],bbox[3]]) plot(shape_records,xlims=[bbox[0],bbox[2]],ylims=[bbox[1],bbox[3]])
def plot_string_match(sf,regex,field): def plot_string_match(sf,regex,field,**kwargs):
""" """
Plot the geometry of a shapefile whose fields match a regular expression given Plot the geometry of a shapefile whose fields match a regular expression given
@ -131,11 +130,13 @@ def plot_string_match(sf,regex,field):
:type field: integer :type field: integer
""" """
index,shape_records = string_match(sf,regex,field) index,shape_records = string_match(sf,regex,field)
plot(shape_records) plot(shape_records,**kwargs)
def new_shape_string(sf,name,regex,field=2,type=shapefile.POINT): def new_shape_string(sf,name,regex,field=2,type=None):
import shapefile
if type is None:
type = shapefile.POINT
newshp = shapefile.Writer(shapeType = sf.shapeType) newshp = shapefile.Writer(shapeType = sf.shapeType)
newshp.autoBalance = 1 newshp.autoBalance = 1
@ -159,3 +160,13 @@ def new_shape_string(sf,name,regex,field=2,type=shapefile.POINT):
newshp.save(name) newshp.save(name)
print index print index
def apply_bbox(sf,ax):
"""
Use bbox as xlim and ylim in ax
"""
limits = sf.bbox
xlim = limits[0],limits[2]
ylim = limits[1],limits[3]
ax.set_xlim(xlim)
ax.set_ylim(ylim)

View file

@ -56,10 +56,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
X, Y = param_to_array(model.X, model.Y) if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X_variance = model.X_variance X = model.X.mean
X_variance = param_to_array(model.X.variance)
else:
X = model.X
X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z) if hasattr(model, 'Z'): Z = param_to_array(model.Z)
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
@ -86,7 +89,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, v, lower, upper = model.predict(Xgrid) m, v, lower, upper = model.predict(Xgrid)
Y = Y Y = Y
for d in which_data_ycols: for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples #optionally plot some samples
@ -98,10 +101,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add error bars for uncertain (if input uncertainty is being modelled) #add error bars for uncertain (if input uncertainty is being modelled)
#if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
# ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
# xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
# ecolor='k', fmt=None, elinewidth=.5, alpha=.5) ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values

View file

@ -8,13 +8,6 @@ import sys
verbose = True verbose = True
try:
import sympy
SYMPY_AVAILABLE=True
except ImportError:
SYMPY_AVAILABLE=False
class Kern_check_model(GPy.core.Model): class Kern_check_model(GPy.core.Model):
""" """
This is a dummy model class used as a base class for checking that the This is a dummy model class used as a base class for checking that the
@ -70,14 +63,11 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
self.add_parameter(self.kernel) self.add_parameter(self.kernel)
def parameters_changed(self):
self.kernel.update_gradients_diag(self.dL_dK, self.X)
def log_likelihood(self): def log_likelihood(self):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum() return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self): def parameters_changed(self):
return self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X) self.kernel.update_gradients_diag(np.diag(self.dL_dK), self.X)
class Kern_check_dK_dX(Kern_check_model): class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """ """This class allows gradient checks for the gradient of a kernel with respect to X. """
@ -99,6 +89,8 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def parameters_changed(self): def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X) self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their
@ -217,11 +209,15 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
return pass_checks return pass_checks
class KernelTestsContinuous(unittest.TestCase): class KernelTestsContinuous(unittest.TestCase):
def setUp(self): def setUp(self):
self.X = np.random.randn(100,2) self.X = np.random.randn(100,2)
self.X2 = np.random.randn(110,2) self.X2 = np.random.randn(110,2)
continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
def test_Matern32(self): def test_Matern32(self):
k = GPy.kern.Matern32(2) k = GPy.kern.Matern32(2)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose)) self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
@ -234,6 +230,7 @@ class KernelTestsContinuous(unittest.TestCase):
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() unittest.main()

View file

@ -0,0 +1,133 @@
'''
Created on 27 Feb 2014
@author: maxz
'''
import unittest
from GPy.core.parameterization.parameterized import Parameterized
from GPy.core.parameterization.param import Param
import numpy
class ParamTestParent(Parameterized):
parent_changed_count = 0
def parameters_changed(self):
self.parent_changed_count += 1
class ParameterizedTest(Parameterized):
params_changed_count = 0
def parameters_changed(self):
self.params_changed_count += 1
def _set_params(self, params, trigger_parent=True):
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
class Test(unittest.TestCase):
def setUp(self):
self.parent = ParamTestParent('test parent')
self.par = ParameterizedTest('test model')
self.par2 = ParameterizedTest('test model 2')
self.p = Param('test parameter', numpy.random.normal(1,2,(10,3)))
self.par.add_parameter(self.p)
self.par.add_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
self.par.add_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
self.par2.add_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
self.par2.add_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
self.parent.add_parameter(self.par)
self.parent.add_parameter(self.par2)
self._observer_triggered = None
self._trigger_count = 0
self._first = None
self._second = None
def _trigger(self, which):
self._observer_triggered = float(which)
self._trigger_count += 1
if self._first is not None:
self._second = self._trigger
else:
self._first = self._trigger
def _trigger_priority(self, which):
if self._first is not None:
self._second = self._trigger_priority
else:
self._first = self._trigger_priority
def test_observable(self):
self.par.add_observer(self, self._trigger, -1)
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.p[0,1] = 3 # trigger observers
self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 1, 'params changed once')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.remove_observer(self)
self.p[2,1] = 4
self.assertEqual(self._observer_triggered, 3, 'observer should not have triggered')
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.add_observer(self, self._trigger, -1)
self.p[2,1] = 4
self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.remove_observer(self, self._trigger)
self.p[0,1] = 3
self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 4, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
def test_set_params(self):
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.par._set_params(numpy.ones(self.par.size))
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.parent._set_params(numpy.ones(self.parent.size) * 2)
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
def test_priority_notify(self):
self.assertEqual(self.par.params_changed_count, 0)
self.par._notify_observers(0, None)
self.assertEqual(self.par.params_changed_count, 1)
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.par._notify_observers(0, -numpy.inf)
self.assertEqual(self.par.params_changed_count, 2)
self.assertEqual(self.parent.parent_changed_count, 1)
def test_priority(self):
self.par.add_observer(self, self._trigger, -1)
self.par.add_observer(self, self._trigger_priority, 0)
self.par._notify_observers(0)
self.assertEqual(self._first, self._trigger_priority, 'priority should be first')
self.assertEqual(self._second, self._trigger, 'priority should be first')
self.par.remove_observer(self)
self._first = self._second = None
self.par.add_observer(self, self._trigger, 1)
self.par.add_observer(self, self._trigger_priority, 0)
self.par._notify_observers(0)
self.assertEqual(self._first, self._trigger, 'priority should be second')
self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -6,6 +6,7 @@ Created on Feb 13, 2014
import unittest import unittest
import GPy import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError
class Test(unittest.TestCase): class Test(unittest.TestCase):
@ -65,7 +66,7 @@ class Test(unittest.TestCase):
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.test1.add_parameter(self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
def test_default_constraints(self): def test_default_constraints(self):
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)

View file

@ -1,89 +1,104 @@
from ..core.parameterization.parameter_core import Observable from ..core.parameterization.parameter_core import Observable
import itertools
class Cacher(object): class Cacher(object):
def __init__(self, operation, limit=5, reset_on_first=False): """
"""
def __init__(self, operation, limit=5, ignore_args=()):
self.limit = int(limit) self.limit = int(limit)
self._reset_on_first = reset_on_first self.ignore_args = ignore_args
self.operation=operation self.operation=operation
self.cached_inputs = [] self.cached_inputs = []
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []
def __call__(self, *args): def __call__(self, *args):
if self._reset_on_first: """
assert isinstance(args[0], Observable) A wrapper function for self.operation,
args[0].add_observer(self, self.reset) """
cached_args = args
#ensure that specified arguments are ignored
if len(self.ignore_args) != 0:
oa = [a for i,a in enumerate(args) if i not in self.ignore_args]
else: else:
cached_args = args[1:] oa = args
# this makes sure we only add an observer once, and that None can be in args
observable_args = []
for a in oa:
if (not any(a is ai for ai in observable_args)) and a is not None:
observable_args.append(a)
if not all([isinstance(arg, Observable) for arg in cached_args]): #make sure that all the found argument really are observable:
#otherswise don't cache anything, pass args straight though
if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args) return self.operation(*args)
if cached_args in self.cached_inputs:
i = self.cached_inputs.index(cached_args) # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args)
#if the result is cached, return the cached computation
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
if any(state):
i = state.index(True)
if self.inputs_changed[i]: if self.inputs_changed[i]:
#(elements of) the args have changed since we last computed: update
self.cached_outputs[i] = self.operation(*args) self.cached_outputs[i] = self.operation(*args)
self.inputs_changed[i] = False self.inputs_changed[i] = False
return self.cached_outputs[i] return self.cached_outputs[i]
else: else:
#first time we've seen these arguments: compute
#first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit: if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0) args_ = self.cached_inputs.pop(0)
[a.remove_observer(self, self.on_cache_changed) for a in args_] [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0) self.inputs_changed.pop(0)
self.cached_outputs.pop(0) self.cached_outputs.pop(0)
self.cached_inputs.append(cached_args) #compute
self.cached_inputs.append(args)
self.cached_outputs.append(self.operation(*args)) self.cached_outputs.append(self.operation(*args))
self.inputs_changed.append(False) self.inputs_changed.append(False)
[a.add_observer(self, self.on_cache_changed) for a in args] [a.add_observer(self, self.on_cache_changed) for a in observable_args]
return self.cached_outputs[-1] return self.cached_outputs[-1]#Max says return.
def on_cache_changed(self, arg): def on_cache_changed(self, arg):
"""
A callback funtion, which sets local flags when the elements of some cached inputs change
this function gets 'hooked up' to the inputs when we cache them, and upon their elements being changed we update here.
"""
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
def reset(self, obj): def reset(self, obj):
[[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs] """
Totally reset the cache
"""
[[a.remove_observer(self, self.on_cache_changed) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
[[a.remove_observer(self, self.reset) for a in args if isinstance(a, Observable)] for args in self.cached_inputs]
self.cached_inputs = [] self.cached_inputs = []
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []
class Cache_this(object):
"""
A decorator which can be applied to bound methods in order to cache them
def cache_this(limit=5, reset_on_self=False): """
def limited_cache(f): def __init__(self, limit=5, ignore_args=()):
c = Cacher(f, limit, reset_on_first=reset_on_self) self.limit = limit
self.ignore_args = ignore_args
self.c = None
def __call__(self, f):
def f_wrap(*args): def f_wrap(*args):
return c(*args) if self.c is None:
f_wrap._cacher = c self.c = Cacher(f, self.limit, ignore_args=self.ignore_args)
return self.c(*args)
f_wrap._cacher = self
f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "")
return f_wrap return f_wrap
return limited_cache
#Xbase = X
#while Xbase is not None:
#try:
#i = self.cached_inputs.index(X)
#break
#except ValueError:
#Xbase = X.base
#continue
#self.inputs_changed[i] = True

View file

@ -2,6 +2,27 @@ import numpy as np
import warnings import warnings
from .. import kern from .. import kern
def build_XY(input_list,output_list=None,index=None):
num_outputs = len(input_list)
_s = [0] + [ _x.shape[0] for _x in input_list ]
_s = np.cumsum(_s)
slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
if output_list is not None:
assert num_outputs == len(output_list)
Y = np.vstack(output_list)
else:
Y = None
if index is not None:
assert len(index) == num_outputs
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,index)] )
else:
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,range(num_outputs))] )
X = np.vstack(input_list)
X = np.hstack([X,I])
return X,Y,slices
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None): def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
#TODO build_icm or build_lcm #TODO build_icm or build_lcm
""" """
@ -25,9 +46,9 @@ def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa
k.input_dim = input_dim + 1 k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.coregionalize(num_outputs,W_columns,W,kappa),tensor=True) kernel = CK[0].prod(kern.Coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]: for k in CK[1:]:
k_coreg = kern.coregionalize(num_outputs,W_columns,W,kappa) k_coreg = kern.Coregionalize(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True) kernel += k.prod(k_coreg,tensor=True)
for k in NC: for k in NC:
kernel += k kernel += k