manual merging

This commit is contained in:
James Hensman 2014-03-10 08:37:59 +00:00
commit a6eae08934
48 changed files with 1907 additions and 1082 deletions

View file

@ -31,7 +31,7 @@ class GP(Model):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
if isinstance(X, ObservableArray) or isinstance(X, VariationalPosterior): if isinstance(X, (ObservableArray, VariationalPosterior)):
self.X = X self.X = X
else: self.X = ObservableArray(X) else: self.X = ObservableArray(X)
@ -65,8 +65,6 @@ class GP(Model):
self.add_parameter(self.kern) self.add_parameter(self.kern)
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
if self.__class__ is GP:
self.parameters_changed()
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
@ -224,13 +222,9 @@ class GP(Model):
self.kern, self.kern,
self.likelihood, self.likelihood,
self.output_dim, self.output_dim,
self._Xoffset,
self._Xscale,
] ]
def _setstate(self, state): def _setstate(self, state):
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop() self.output_dim = state.pop()
self.likelihood = state.pop() self.likelihood = state.pop()
self.kern = state.pop() self.kern = state.pop()

View file

@ -15,6 +15,7 @@ import itertools
class Model(Parameterized): class Model(Parameterized):
_fail_count = 0 # Count of failed optimization steps (see objective) _fail_count = 0 # Count of failed optimization steps (see objective)
_allowed_failures = 10 # number of allowed failures _allowed_failures = 10 # number of allowed failures
def __init__(self, name): def __init__(self, name):
super(Model, self).__init__(name) # Parameterized.__init__(self) super(Model, self).__init__(name) # Parameterized.__init__(self)
self.optimization_runs = [] self.optimization_runs = []
@ -25,14 +26,8 @@ class Model(Parameterized):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
g = np.zeros(self.size) return self.gradient
try:
[p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
except ValueError:
raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
return g
raise NotImplementedError, "this needs to be implemented to use the model class"
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class. Get the current state of the class.
@ -60,20 +55,6 @@ class Model(Parameterized):
self.priors = state.pop() self.priors = state.pop()
Parameterized._setstate(self, state) Parameterized._setstate(self, state)
def randomize(self):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
# first take care of all parameters (from N(0,1))
# x = self._get_params_transformed()
x = np.random.randn(self.size_transformed)
x = self._untransform_params(x)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params(x)
# self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
Perform random restarts of the model, and set the model to the best Perform random restarts of the model, and set the model to the best
@ -161,6 +142,12 @@ class Model(Parameterized):
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning, 'parameters now have default constraints'
def input_sensitivity(self):
"""
Returns the sensitivity for each dimension of this kernel.
"""
return self.kern.input_sensitivity()
def objective_function(self, x): def objective_function(self, x):
""" """
The objective function passed to the optimizer. It combines The objective function passed to the optimizer. It combines
@ -216,8 +203,8 @@ class Model(Parameterized):
try: try:
self._set_params_transformed(x) self._set_params_transformed(x)
obj_f = -float(self.log_likelihood()) - self.log_prior() obj_f = -float(self.log_likelihood()) - self.log_prior()
self._fail_count = 0
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise e
@ -240,6 +227,11 @@ class Model(Parameterized):
TODO: valid args TODO: valid args
""" """
if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed"
if self.size == 0:
raise RuntimeError, "Model without parameters cannot be minimized"
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
@ -278,9 +270,8 @@ class Model(Parameterized):
The gradient is considered correct if the ratio of the analytical The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity. and numerical gradients is within <tolerance> of unity.
""" """
x = self._get_params_transformed().copy() x = self._get_params_transformed().copy()
if not verbose: if not verbose:
# make sure only to test the selected parameters # make sure only to test the selected parameters
if target_param is None: if target_param is None:
@ -297,7 +288,7 @@ class Model(Parameterized):
return return
# just check the global ratio # just check the global ratio
dx = np.zeros_like(x) dx = np.zeros(x.shape)
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
# evaulate around the point x # evaulate around the point x
@ -307,10 +298,12 @@ class Model(Parameterized):
dx = dx[transformed_index] dx = dx[transformed_index]
gradient = gradient[transformed_index] gradient = gradient[transformed_index]
numerical_gradient = (f1 - f2) / (2 * dx) denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient == 0, 1e-32, gradient))) global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) gloabl_diff = (f1 - f2) - denominator
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gloabl_diff) < tolerance)
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
@ -356,7 +349,8 @@ class Model(Parameterized):
xx[xind] -= 2.*step xx[xind] -= 2.*step
f2 = self.objective_function(xx) f2 = self.objective_function(xx)
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient[xind]) if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs((f1 - f2) / 2 / step - gradient[xind]) difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
@ -372,6 +366,7 @@ class Model(Parameterized):
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string
self._set_params_transformed(x) self._set_params_transformed(x)
return ret return ret

View file

@ -6,19 +6,6 @@ __updated__ = '2013-12-16'
import numpy as np import numpy as np
from parameter_core import Observable from parameter_core import Observable
class ParamList(list):
"""
List to store ndarray-likes in.
It will look for 'is' instead of calling __eq__ on each element.
"""
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass
class ObservableArray(np.ndarray, Observable): class ObservableArray(np.ndarray, Observable):
""" """
An ndarray which reports changes to its observers. An ndarray which reports changes to its observers.
@ -62,10 +49,11 @@ class ObservableArray(np.ndarray, Observable):
def __setitem__(self, s, val): def __setitem__(self, s, val):
if self._s_not_empty(s): if self._s_not_empty(s):
super(ObservableArray, self).__setitem__(s, val) super(ObservableArray, self).__setitem__(s, val)
self._notify_observers() self.notify_observers(self[s])
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop)) return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val): def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val) return self.__setitem__(slice(start, stop), val)
@ -77,149 +65,149 @@ class ObservableArray(np.ndarray, Observable):
def __ilshift__(self, *args, **kwargs): def __ilshift__(self, *args, **kwargs):
r = np.ndarray.__ilshift__(self, *args, **kwargs) r = np.ndarray.__ilshift__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __irshift__(self, *args, **kwargs): def __irshift__(self, *args, **kwargs):
r = np.ndarray.__irshift__(self, *args, **kwargs) r = np.ndarray.__irshift__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __ixor__(self, *args, **kwargs): def __ixor__(self, *args, **kwargs):
r = np.ndarray.__ixor__(self, *args, **kwargs) r = np.ndarray.__ixor__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __ipow__(self, *args, **kwargs): def __ipow__(self, *args, **kwargs):
r = np.ndarray.__ipow__(self, *args, **kwargs) r = np.ndarray.__ipow__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __ifloordiv__(self, *args, **kwargs): def __ifloordiv__(self, *args, **kwargs):
r = np.ndarray.__ifloordiv__(self, *args, **kwargs) r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __isub__(self, *args, **kwargs): def __isub__(self, *args, **kwargs):
r = np.ndarray.__isub__(self, *args, **kwargs) r = np.ndarray.__isub__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __ior__(self, *args, **kwargs): def __ior__(self, *args, **kwargs):
r = np.ndarray.__ior__(self, *args, **kwargs) r = np.ndarray.__ior__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __itruediv__(self, *args, **kwargs): def __itruediv__(self, *args, **kwargs):
r = np.ndarray.__itruediv__(self, *args, **kwargs) r = np.ndarray.__itruediv__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __idiv__(self, *args, **kwargs): def __idiv__(self, *args, **kwargs):
r = np.ndarray.__idiv__(self, *args, **kwargs) r = np.ndarray.__idiv__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __iand__(self, *args, **kwargs): def __iand__(self, *args, **kwargs):
r = np.ndarray.__iand__(self, *args, **kwargs) r = np.ndarray.__iand__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __imod__(self, *args, **kwargs): def __imod__(self, *args, **kwargs):
r = np.ndarray.__imod__(self, *args, **kwargs) r = np.ndarray.__imod__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __iadd__(self, *args, **kwargs): def __iadd__(self, *args, **kwargs):
r = np.ndarray.__iadd__(self, *args, **kwargs) r = np.ndarray.__iadd__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
def __imul__(self, *args, **kwargs): def __imul__(self, *args, **kwargs):
r = np.ndarray.__imul__(self, *args, **kwargs) r = np.ndarray.__imul__(self, *args, **kwargs)
self._notify_observers() self.notify_observers()
return r return r
# def __rrshift__(self, *args, **kwargs): # def __rrshift__(self, *args, **kwargs):
# r = np.ndarray.__rrshift__(self, *args, **kwargs) # r = np.ndarray.__rrshift__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __ror__(self, *args, **kwargs): # def __ror__(self, *args, **kwargs):
# r = np.ndarray.__ror__(self, *args, **kwargs) # r = np.ndarray.__ror__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rxor__(self, *args, **kwargs): # def __rxor__(self, *args, **kwargs):
# r = np.ndarray.__rxor__(self, *args, **kwargs) # r = np.ndarray.__rxor__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rdivmod__(self, *args, **kwargs): # def __rdivmod__(self, *args, **kwargs):
# r = np.ndarray.__rdivmod__(self, *args, **kwargs) # r = np.ndarray.__rdivmod__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __radd__(self, *args, **kwargs): # def __radd__(self, *args, **kwargs):
# r = np.ndarray.__radd__(self, *args, **kwargs) # r = np.ndarray.__radd__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rdiv__(self, *args, **kwargs): # def __rdiv__(self, *args, **kwargs):
# r = np.ndarray.__rdiv__(self, *args, **kwargs) # r = np.ndarray.__rdiv__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rtruediv__(self, *args, **kwargs): # def __rtruediv__(self, *args, **kwargs):
# r = np.ndarray.__rtruediv__(self, *args, **kwargs) # r = np.ndarray.__rtruediv__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rshift__(self, *args, **kwargs): # def __rshift__(self, *args, **kwargs):
# r = np.ndarray.__rshift__(self, *args, **kwargs) # r = np.ndarray.__rshift__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rmul__(self, *args, **kwargs): # def __rmul__(self, *args, **kwargs):
# r = np.ndarray.__rmul__(self, *args, **kwargs) # r = np.ndarray.__rmul__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rpow__(self, *args, **kwargs): # def __rpow__(self, *args, **kwargs):
# r = np.ndarray.__rpow__(self, *args, **kwargs) # r = np.ndarray.__rpow__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rsub__(self, *args, **kwargs): # def __rsub__(self, *args, **kwargs):
# r = np.ndarray.__rsub__(self, *args, **kwargs) # r = np.ndarray.__rsub__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r
# def __rfloordiv__(self, *args, **kwargs): # def __rfloordiv__(self, *args, **kwargs):
# r = np.ndarray.__rfloordiv__(self, *args, **kwargs) # r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
# self._notify_observers() # self.notify_observers()
# return r # return r

View file

@ -5,47 +5,7 @@ Created on Oct 2, 2013
''' '''
import numpy import numpy
from numpy.lib.function_base import vectorize from numpy.lib.function_base import vectorize
from param import Param from lists_and_dicts import IntArrayDict
from collections import defaultdict
class ParamDict(defaultdict):
def __init__(self):
"""
Default will be self._default, if not set otherwise
"""
defaultdict.__init__(self, self.default_factory)
def __getitem__(self, key):
try:
return defaultdict.__getitem__(self, key)
except KeyError:
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return defaultdict.__getitem__(self, a)
raise
def __contains__(self, key):
if defaultdict.__contains__(self, key):
return True
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return True
return False
def __setitem__(self, key, value):
if isinstance(key, Param):
for a in self.iterkeys():
if numpy.all(a==key) and a._parent_index_==key._parent_index_:
return super(ParamDict, self).__setitem__(a, value)
defaultdict.__setitem__(self, key, value)
class SetDict(ParamDict):
def default_factory(self):
return set()
class IntArrayDict(ParamDict):
def default_factory(self):
return numpy.int_([])
class ParameterIndexOperations(object): class ParameterIndexOperations(object):
''' '''
@ -102,6 +62,7 @@ class ParameterIndexOperations(object):
def clear(self): def clear(self):
self._properties.clear() self._properties.clear()
@property
def size(self): def size(self):
return reduce(lambda a,b: a+b.size, self.iterindices(), 0) return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
@ -194,14 +155,18 @@ class ParameterIndexOperationsView(object):
def shift_right(self, start, size): def shift_right(self, start, size):
raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations' self._param_index_ops.shift_right(start+self._offset, size)
def shift_left(self, start, size):
self._param_index_ops.shift_left(start+self._offset, size)
self._offset -= size
self._size -= size
def clear(self): def clear(self):
for i, ind in self.items(): for i, ind in self.items():
self._param_index_ops.remove(i, ind+self._offset) self._param_index_ops.remove(i, ind+self._offset)
@property
def size(self): def size(self):
return reduce(lambda a,b: a+b.size, self.iterindices(), 0) return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
@ -232,9 +197,7 @@ class ParameterIndexOperationsView(object):
def __getitem__(self, prop): def __getitem__(self, prop):
ind = self._filter_index(self._param_index_ops[prop]) ind = self._filter_index(self._param_index_ops[prop])
if ind.size > 0: return ind
return ind
raise KeyError, prop
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
import pprint import pprint

View file

@ -0,0 +1,35 @@
'''
Created on 27 Feb 2014
@author: maxz
'''
from collections import defaultdict
class DefaultArrayDict(defaultdict):
def __init__(self):
"""
Default will be self._default, if not set otherwise
"""
defaultdict.__init__(self, self.default_factory)
class SetDict(DefaultArrayDict):
def default_factory(self):
return set()
class IntArrayDict(DefaultArrayDict):
def default_factory(self):
import numpy as np
return np.int_([])
class ArrayList(list):
"""
List to store ndarray-likes in.
It will look for 'is' instead of calling __eq__ on each element.
"""
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass

View file

@ -3,8 +3,8 @@
import itertools import itertools
import numpy import numpy
from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from parameter_core import OptimizationHandlable, Gradcheckable, adjust_name_for_printing
from array_core import ObservableArray, ParamList from array_core import ObservableArray
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(Constrainable, ObservableArray, Gradcheckable): class Param(OptimizationHandlable, ObservableArray):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -50,11 +50,11 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
obj._realsize_ = obj.size obj._realsize_ = obj.size
obj._realndim_ = obj.ndim obj._realndim_ = obj.ndim
obj._updated_ = False obj._updated_ = False
from index_operations import SetDict from lists_and_dicts import SetDict
obj._tied_to_me_ = SetDict() obj._tied_to_me_ = SetDict()
obj._tied_to_ = [] obj._tied_to_ = []
obj._original_ = True obj._original_ = True
obj._gradient_ = None obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
return obj return obj
def __init__(self, name, input_array, default_constraint=None, *a, **kw): def __init__(self, name, input_array, default_constraint=None, *a, **kw):
@ -77,7 +77,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
if obj is None: return if obj is None: return
super(Param, self).__array_finalize__(obj) super(Param, self).__array_finalize__(obj)
self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._parent_ = getattr(obj, '_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None) self._parent_index_ = getattr(obj, '_parent_index_', None)
self._default_constraint_ = getattr(obj, '_default_constraint_', None) self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None) self._current_slice_ = getattr(obj, '_current_slice_', None)
@ -89,16 +89,18 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
self._updated_ = getattr(obj, '_updated_', None) self._updated_ = getattr(obj, '_updated_', None)
self._original_ = getattr(obj, '_original_', None) self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, 'name', None) self._name = getattr(obj, 'name', None)
self._gradient_ = getattr(obj, '_gradient_', None) self._gradient_array_ = getattr(obj, '_gradient_array_', None)
self.constraints = getattr(obj, 'constraints', None) self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None) self.priors = getattr(obj, 'priors', None)
@property
def _param_array_(self):
return self
@property @property
def gradient(self): def gradient(self):
if self._gradient_ is None: return self._gradient_array_[self._current_slice_]
self._gradient_ = numpy.zeros(self._realshape_)
return self._gradient_[self._current_slice_]
@gradient.setter @gradient.setter
def gradient(self, val): def gradient(self, val):
self.gradient[:] = val self.gradient[:] = val
@ -110,7 +112,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
func, args, state = super(Param, self).__reduce__() func, args, state = super(Param, self).__reduce__()
return func, args, (state, return func, args, (state,
(self.name, (self.name,
self._direct_parent_, self._parent_,
self._parent_index_, self._parent_index_,
self._default_constraint_, self._default_constraint_,
self._current_slice_, self._current_slice_,
@ -135,7 +137,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
self._current_slice_ = state.pop() self._current_slice_ = state.pop()
self._default_constraint_ = state.pop() self._default_constraint_ = state.pop()
self._parent_index_ = state.pop() self._parent_index_ = state.pop()
self._direct_parent_ = state.pop() self._parent_ = state.pop()
self.name = state.pop() self.name = state.pop()
def copy(self, *args): def copy(self, *args):
@ -148,17 +150,20 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
#=========================================================================== #===========================================================================
# get/set parameters # get/set parameters
#=========================================================================== #===========================================================================
def _set_params(self, param, update=True): # def _set_params(self, param, trigger_parent=True):
self.flat = param # self.flat = param
# if trigger_parent: min_priority = None
def _get_params(self): # else: min_priority = -numpy.inf
return self.flat # self.notify_observers(None, min_priority)
#
def _collect_gradient(self, target): # def _get_params(self):
target += self.gradient.flat # return self.flat
#
def _set_gradient(self, g): # def _collect_gradient(self, target):
self.gradient = g.reshape(self._realshape_) # target += self.gradient.flat
#
# def _set_gradient(self, g):
# self.gradient = g.reshape(self._realshape_)
#=========================================================================== #===========================================================================
# Array operations -> done # Array operations -> done
@ -172,11 +177,9 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass # returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
def __setitem__(self, s, val): def __setitem__(self, s, val):
super(Param, self).__setitem__(s, val) super(Param, self).__setitem__(s, val)
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
#self._notify_observers()
#=========================================================================== #===========================================================================
# Index Operations: # Index Operations:
@ -204,6 +207,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
ind = self._indices(slice_index) ind = self._indices(slice_index)
if ind.ndim < 2: ind = ind[:, None] if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int) return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -230,7 +234,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
#=========================================================================== #===========================================================================
@property @property
def is_fixed(self): def is_fixed(self):
return self._highest_parent_._is_fixed(self) from transformations import __fixed__
return self.constraints[__fixed__].size == self.size
#def round(self, decimals=0, out=None): #def round(self, decimals=0, out=None):
# view = super(Param, self).round(decimals, out).view(Param) # view = super(Param, self).round(decimals, out).view(Param)
# view.__array_finalize__(self) # view.__array_finalize__(self)
@ -244,7 +249,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
#=========================================================================== #===========================================================================
@property @property
def _description_str(self): def _description_str(self):
if self.size <= 1: return ["%f" % self] if self.size <= 1:
return [str(self.view(numpy.ndarray)[0])]
else: return [str(self.shape)] else: return [str(self.shape)]
def parameter_names(self, add_self=False, adjust_for_printing=False): def parameter_names(self, add_self=False, adjust_for_printing=False):
if adjust_for_printing: if adjust_for_printing:
@ -267,7 +273,7 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
return [t._short() for t in self._tied_to_] or [''] return [t._short() for t in self._tied_to_] or ['']
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hirarchy_name()) x=self.hierarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index): def _ties_for(self, rav_index):
# size = sum(p.size for p in self._tied_to_) # size = sum(p.size for p in self._tied_to_)
@ -301,12 +307,12 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
gen = map(lambda x: " ".join(map(str, x)), gen) gen = map(lambda x: " ".join(map(str, x)), gen)
return reduce(lambda a, b:max(a, len(b)), gen, len(header)) return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self): def _max_len_values(self):
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name())) return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hierarchy_name()))
def _max_len_index(self, ind): def _max_len_index(self, ind):
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
def _short(self): def _short(self):
# short string to print # short string to print
name = self.hirarchy_name() name = self.hierarchy_name()
if self._realsize_ < 2: if self._realsize_ < 2:
return name return name
ind = self._indices() ind = self._indices()
@ -329,8 +335,8 @@ class Param(Constrainable, ObservableArray, Gradcheckable):
if lp is None: lp = self._max_len_names(prirs, __tie_name__) if lp is None: lp = self._max_len_names(prirs, __tie_name__)
sep = '-' sep = '-'
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__() # except: return super(Param, self).__str__()
@ -345,7 +351,8 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining. See :py:class:`GPy.core.parameter.Param` for more details on constraining.
""" """
# self.params = params # self.params = params
self.params = ParamList([]) from lists_and_dicts import ArrayList
self.params = ArrayList([])
for p in params: for p in params:
for p in p.flattened_parameters: for p in p.flattened_parameters:
if p not in self.params: if p not in self.params:
@ -353,6 +360,21 @@ class ParamConcatenation(object):
self._param_sizes = [p.size for p in self.params] self._param_sizes = [p.size for p in self.params]
startstops = numpy.cumsum([0] + self._param_sizes) startstops = numpy.cumsum([0] + self._param_sizes)
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])] self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
parents = dict()
for p in self.params:
if p.has_parent():
parent = p._parent_
level = 0
while parent is not None:
if parent in parents:
parents[parent] = max(level, parents[parent])
else:
parents[parent] = level
level += 1
parent = parent._parent_
import operator
self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
#=========================================================================== #===========================================================================
# Get/set items, enable broadcasting # Get/set items, enable broadcasting
#=========================================================================== #===========================================================================
@ -366,24 +388,26 @@ class ParamConcatenation(object):
val = val._vals() val = val._vals()
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self._vals(); vals[s] = val; del val vals = self._vals(); vals[s] = val; del val
[numpy.place(p, ind[ps], vals[ps]) and update and p._notify_observers() [numpy.place(p, ind[ps], vals[ps])
for p, ps in zip(self.params, self._param_slices_)] for p, ps in zip(self.params, self._param_slices_)]
if update:
self.update_all_params()
def _vals(self): def _vals(self):
return numpy.hstack([p._get_params() for p in self.params]) return numpy.hstack([p._param_array_ for p in self.params])
#=========================================================================== #===========================================================================
# parameter operations: # parameter operations:
#=========================================================================== #===========================================================================
def update_all_params(self): def update_all_params(self):
for p in self.params: for par in self.parents:
p._notify_observers() par.notify_observers(-numpy.inf)
def constrain(self, constraint, warning=True): def constrain(self, constraint, warning=True):
[param.constrain(constraint, update=False) for param in self.params] [param.constrain(constraint, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain.__doc__ = Param.constrain.__doc__ constrain.__doc__ = Param.constrain.__doc__
def constrain_positive(self, warning=True): def constrain_positive(self, warning=True):
[param.constrain_positive(warning, update=False) for param in self.params] [param.constrain_positive(warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_positive.__doc__ = Param.constrain_positive.__doc__ constrain_positive.__doc__ = Param.constrain_positive.__doc__
@ -393,12 +417,12 @@ class ParamConcatenation(object):
fix = constrain_fixed fix = constrain_fixed
def constrain_negative(self, warning=True): def constrain_negative(self, warning=True):
[param.constrain_negative(warning, update=False) for param in self.params] [param.constrain_negative(warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_negative.__doc__ = Param.constrain_negative.__doc__ constrain_negative.__doc__ = Param.constrain_negative.__doc__
def constrain_bounded(self, lower, upper, warning=True): def constrain_bounded(self, lower, upper, warning=True):
[param.constrain_bounded(lower, upper, warning, update=False) for param in self.params] [param.constrain_bounded(lower, upper, warning, trigger_parent=False) for param in self.params]
self.update_all_params() self.update_all_params()
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__ constrain_bounded.__doc__ = Param.constrain_bounded.__doc__

View file

@ -1,37 +1,132 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Core module for parameterization.
This module implements all parameterization techniques, split up in modular bits.
HierarchyError:
raised when an error with the hierarchy occurs (circles etc.)
Observable:
Observable Pattern for patameterization
"""
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np
import itertools
__updated__ = '2013-12-16' __updated__ = '2013-12-16'
class HierarchyError(Exception):
"""
Gets thrown when something is wrong with the parameter hierarchy.
"""
def adjust_name_for_printing(name): def adjust_name_for_printing(name):
"""
Make sure a name can be printed, alongside used as a variable name.
"""
if name is not None: if name is not None:
return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "") return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@",'_at_')
return '' return ''
class Observable(object): class Observable(object):
"""
Observable pattern for parameterization.
This Object allows for observers to register with self and a (bound!) function
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
_updated = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
from collections import defaultdict self._observer_callables_ = []
self._observer_callables_ = defaultdict(list) def __del__(self, *args, **kwargs):
del self._observer_callables_
def add_observer(self, observer, callble):
self._observer_callables_[observer].append(callble) def add_observer(self, observer, callble, priority=0):
self._insert_sorted(priority, observer, callble)
def remove_observer(self, observer, callble=None): def remove_observer(self, observer, callble=None):
if observer in self._observer_callables_: to_remove = []
if callble is None: for p, obs, clble in self._observer_callables_:
del self._observer_callables_[observer] if callble is not None:
elif callble in self._observer_callables_[observer]: if (obs == observer) and (callble == clble):
self._observer_callables_[observer].remove(callble) to_remove.append((p, obs, clble))
if len(self._observer_callables_[observer]) == 0: else:
self.remove_observer(observer) if obs is observer:
to_remove.append((p, obs, clble))
def _notify_observers(self): for r in to_remove:
[[callble(self) for callble in callables] self._observer_callables_.remove(r)
for callables in self._observer_callables_.itervalues()]
def notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
notification loop.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param which: object, which started this notification loop
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
"""
if which is None:
which = self
if min_priority is None:
[callble(which) for _, _, callble in self._observer_callables_]
else:
for p, _, callble in self._observer_callables_:
if p <= min_priority:
break
callble(which)
def _insert_sorted(self, p, o, c):
ins = 0
for pr, _, _ in self._observer_callables_:
if p > pr:
break
ins += 1
self._observer_callables_.insert(ins, (p, o, c))
class Pickleable(object): class Pickleable(object):
"""
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle
if isinstance(f, str):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state)
# TODO: maybe parameters_changed() here?
return
self.__dict__ = state
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
def _getstate(self): def _getstate(self):
""" """
Returns the state of this class in a memento pattern. Returns the state of this class in a memento pattern.
@ -58,70 +153,145 @@ class Pickleable(object):
#=============================================================================== #===============================================================================
class Parentable(object): class Parentable(object):
_direct_parent_ = None """
Enable an Object to have a parent.
Additionally this adds the parent_index, which is the index for the parent
to look for in its parameter list.
"""
_parent_ = None
_parent_index_ = None _parent_index_ = None
def has_parent(self): def has_parent(self):
return self._direct_parent_ is not None """
Return whether this parentable object currently has a parent.
def _notify_parent_change(self): """
for p in self._parameters_: return self._parent_ is not None
p._parent_changed(self)
def _parent_changed(self): def _parent_changed(self):
"""
Gets called, when the parent changed, so we can adjust our
inner attributes according to the new parent.
"""
raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent" raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
def _disconnect_parent(self, *args, **kw):
"""
Disconnect this object from its parent
"""
raise NotImplementedError, "Abstaract superclass"
@property @property
def _highest_parent_(self): def _highest_parent_(self):
if self._direct_parent_ is None: """
Gets the highest parent by traversing up to the root node of the hierarchy.
"""
if self._parent_ is None:
return self return self
return self._direct_parent_._highest_parent_ return self._parent_._highest_parent_
def _notify_parameters_changed(self): def _notify_parent_change(self):
raise NotImplementedError, "shouldnt happen, abstract superclass" """
Dont do anything if in leaf node
"""
pass
class Gradcheckable(Parentable):
"""
Adds the functionality for an object to be gradcheckable.
It is just a thin wrapper of a call to the highest parent for now.
TODO: Can be done better, by only changing parameters of the current parameter handle,
such that object hierarchy only has to change for those.
"""
def __init__(self, *a, **kw):
super(Gradcheckable, self).__init__(*a, **kw)
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
"""
Check the gradient of this parameter with respect to the highest parent's
objective function.
This is a three point estimate of the gradient, wiggling at the parameters
with a stepsize step.
The check passes if either the ratio or the difference between numerical and
analytical gradient is smaller then tolerance.
class Nameable(Parentable): :param bool verbose: whether each parameter shall be checked individually.
:param float step: the stepsize for the numerical three point gradient estimate.
:param flaot tolerance: the tolerance for the gradient ratio or difference.
"""
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
"""
Perform the checkgrad on the model.
TODO: this can be done more efficiently, when doing it inside here
"""
raise NotImplementedError, "Need log likelihood to check gradient against"
class Nameable(Gradcheckable):
"""
Make an object nameable inside the hierarchy.
"""
def __init__(self, name, *a, **kw): def __init__(self, name, *a, **kw):
super(Nameable, self).__init__(*a, **kw) super(Nameable, self).__init__(*a, **kw)
self._name = name or self.__class__.__name__ self._name = name or self.__class__.__name__
@property @property
def name(self): def name(self):
"""
The name of this object
"""
return self._name return self._name
@name.setter @name.setter
def name(self, name): def name(self, name):
"""
Set the name of this object.
Tell the parent if the name has changed.
"""
from_name = self.name from_name = self.name
assert isinstance(name, str) assert isinstance(name, str)
self._name = name self._name = name
if self.has_parent(): if self.has_parent():
self._direct_parent_._name_changed(self, from_name) self._parent_._name_changed(self, from_name)
def hirarchy_name(self, adjust_for_printing=True): def hierarchy_name(self, adjust_for_printing=True):
"""
return the name for this object with the parents names attached by dots.
:param bool adjust_for_printing: whether to call :func:`~adjust_for_printing()`
on the names, recursively
"""
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x else: adjust = lambda x: x
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) return self._parent_.hierarchy_name() + "." + adjust(self.name)
return adjust(self.name) return adjust(self.name)
class Gradcheckable(Parentable):
def __init__(self, *a, **kw):
super(Gradcheckable, self).__init__(*a, **kw)
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
raise NotImplementedError, "Need log likelihood to check gradient against"
class Indexable(object): class Indexable(object):
"""
Enable enraveled indexes and offsets for this object.
The raveled index of an object is the index for its parameters in a flattened int array.
"""
def _raveled_index(self): def _raveled_index(self):
"""
Flattened array of ints, specifying the index of this object.
This has to account for shaped parameters!
"""
raise NotImplementedError, "Need to be able to get the raveled Index" raise NotImplementedError, "Need to be able to get the raveled Index"
def _internal_offset(self): def _internal_offset(self):
"""
The offset for this parameter inside its parent.
This has to account for shaped parameters!
"""
return 0 return 0
def _offset_for(self, param): def _offset_for(self, param):
"""
Return the offset of the param inside this parameterized object.
This does not need to account for shaped parameters, as it
basically just sums up the parameter sizes which come before param.
"""
raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?" raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
def _raveled_index_for(self, param): def _raveled_index_for(self, param):
@ -134,6 +304,15 @@ class Indexable(object):
class Constrainable(Nameable, Indexable): class Constrainable(Nameable, Indexable):
"""
Make an object constrainable with Priors and Transformations.
TODO: Mappings!!
Adding a constraint to a Parameter means to tell the highest parent that
the constraint was added and making sure that all parameters covered
by this object are indeed conforming to the constraint.
:func:`constrain()` and :func:`unconstrain()` are main methods here
"""
def __init__(self, name, default_constraint=None, *a, **kw): def __init__(self, name, default_constraint=None, *a, **kw):
super(Constrainable, self).__init__(name=name, *a, **kw) super(Constrainable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint self._default_constraint_ = default_constraint
@ -143,12 +322,16 @@ class Constrainable(Nameable, Indexable):
if self._default_constraint_ is not None: if self._default_constraint_ is not None:
self.constrain(self._default_constraint_) self.constrain(self._default_constraint_)
def _disconnect_parent(self, constr=None): def _disconnect_parent(self, constr=None, *args, **kw):
"""
From Parentable:
disconnect the parent and set the new constraints to constr
"""
if constr is None: if constr is None:
constr = self.constraints.copy() constr = self.constraints.copy()
self.constraints.clear() self.constraints.clear()
self.constraints = constr self.constraints = constr
self._direct_parent_ = None self._parent_ = None
self._parent_index_ = None self._parent_index_ = None
self._connect_fixes() self._connect_fixes()
self._notify_parent_change() self._notify_parent_change()
@ -156,15 +339,15 @@ class Constrainable(Nameable, Indexable):
#=========================================================================== #===========================================================================
# Fixing Parameters: # Fixing Parameters:
#=========================================================================== #===========================================================================
def constrain_fixed(self, value=None, warning=True): def constrain_fixed(self, value=None, warning=True, trigger_parent=True):
""" """
Constrain this paramter to be fixed to the current value it carries. Constrain this parameter to be fixed to the current value it carries.
:param warning: print a warning for overwriting constraints. :param warning: print a warning for overwriting constraints.
""" """
if value is not None: if value is not None:
self[:] = value self[:] = value
self.constrain(__fixed__, warning=warning) self.constrain(__fixed__, warning=warning, trigger_parent=trigger_parent)
rav_i = self._highest_parent_._raveled_index_for(self) rav_i = self._highest_parent_._raveled_index_for(self)
self._highest_parent_._set_fixed(rav_i) self._highest_parent_._set_fixed(rav_i)
fix = constrain_fixed fix = constrain_fixed
@ -178,20 +361,17 @@ class Constrainable(Nameable, Indexable):
unfix = unconstrain_fixed unfix = unconstrain_fixed
def _set_fixed(self, index): def _set_fixed(self, index):
import numpy as np
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
self._fixes_[index] = FIXED self._fixes_[index] = FIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
def _set_unfixed(self, index): def _set_unfixed(self, index):
import numpy as np
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
# rav_i = self._raveled_index_for(param)[index] # rav_i = self._raveled_index_for(param)[index]
self._fixes_[index] = UNFIXED self._fixes_[index] = UNFIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
def _connect_fixes(self): def _connect_fixes(self):
import numpy as np
fixed_indices = self.constraints[__fixed__] fixed_indices = self.constraints[__fixed__]
if fixed_indices.size > 0: if fixed_indices.size > 0:
self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED
@ -205,11 +385,20 @@ class Constrainable(Nameable, Indexable):
#=========================================================================== #===========================================================================
# Prior Operations # Prior Operations
#=========================================================================== #===========================================================================
def set_prior(self, prior, warning=True, update=True): def set_prior(self, prior, warning=True):
"""
Set the prior for this object to prior.
:param :class:`~GPy.priors.Prior` prior: a prior to set for this parameter
:param bool warning: whether to warn if another prior was set for this parameter
"""
repriorized = self.unset_priors() repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning, update) self._add_to_index_operations(self.priors, repriorized, prior, warning)
def unset_priors(self, *priors): def unset_priors(self, *priors):
"""
Un-set all priors given from this parameter handle.
"""
return self._remove_from_index_operations(self.priors, priors) return self._remove_from_index_operations(self.priors, priors)
def log_prior(self): def log_prior(self):
@ -221,7 +410,6 @@ class Constrainable(Nameable, Indexable):
def _log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
import numpy as np
if self.priors.size > 0: if self.priors.size > 0:
x = self._get_params() x = self._get_params()
ret = np.zeros(x.size) ret = np.zeros(x.size)
@ -233,7 +421,7 @@ class Constrainable(Nameable, Indexable):
# Constrain operations -> done # Constrain operations -> done
#=========================================================================== #===========================================================================
def constrain(self, transform, warning=True, update=True): def constrain(self, transform, warning=True, trigger_parent=True):
""" """
:param transform: the :py:class:`GPy.core.transformations.Transformation` :param transform: the :py:class:`GPy.core.transformations.Transformation`
to constrain the this parameter to. to constrain the this parameter to.
@ -243,9 +431,9 @@ class Constrainable(Nameable, Indexable):
:py:class:`GPy.core.transformations.Transformation`. :py:class:`GPy.core.transformations.Transformation`.
""" """
if isinstance(transform, Transformation): if isinstance(transform, Transformation):
self._set_params(transform.initialize(self._get_params()), update=False) self._param_array_[:] = transform.initialize(self._param_array_)
reconstrained = self.unconstrain() reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, transform, warning, update) self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
def unconstrain(self, *transforms): def unconstrain(self, *transforms):
""" """
@ -256,30 +444,30 @@ class Constrainable(Nameable, Indexable):
""" """
return self._remove_from_index_operations(self.constraints, transforms) return self._remove_from_index_operations(self.constraints, transforms)
def constrain_positive(self, warning=True, update=True): def constrain_positive(self, warning=True, trigger_parent=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default positive constraint. Constrain this parameter to the default positive constraint.
""" """
self.constrain(Logexp(), warning=warning, update=update) self.constrain(Logexp(), warning=warning, trigger_parent=trigger_parent)
def constrain_negative(self, warning=True, update=True): def constrain_negative(self, warning=True, trigger_parent=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default negative constraint. Constrain this parameter to the default negative constraint.
""" """
self.constrain(NegativeLogexp(), warning=warning, update=update) self.constrain(NegativeLogexp(), warning=warning, trigger_parent=trigger_parent)
def constrain_bounded(self, lower, upper, warning=True, update=True): def constrain_bounded(self, lower, upper, warning=True, trigger_parent=True):
""" """
:param lower, upper: the limits to bound this parameter to :param lower, upper: the limits to bound this parameter to
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to lie within the given range. Constrain this parameter to lie within the given range.
""" """
self.constrain(Logistic(lower, upper), warning=warning, update=update) self.constrain(Logistic(lower, upper), warning=warning, trigger_parent=trigger_parent)
def unconstrain_positive(self): def unconstrain_positive(self):
""" """
@ -302,6 +490,10 @@ class Constrainable(Nameable, Indexable):
self.unconstrain(Logistic(lower, upper)) self.unconstrain(Logistic(lower, upper))
def _parent_changed(self, parent): def _parent_changed(self, parent):
"""
From Parentable:
Called when the parent changed
"""
from index_operations import ParameterIndexOperationsView from index_operations import ParameterIndexOperationsView
self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size)
self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size) self.priors = ParameterIndexOperationsView(parent.priors, parent._offset_for(self), self.size)
@ -309,17 +501,26 @@ class Constrainable(Nameable, Indexable):
for p in self._parameters_: for p in self._parameters_:
p._parent_changed(parent) p._parent_changed(parent)
def _add_to_index_operations(self, which, reconstrained, transform, warning, update): def _add_to_index_operations(self, which, reconstrained, what, warning):
"""
Helper preventing copy code.
This addes the given what (transformation, prior etc) to parameter index operations which.
revonstrained are reconstrained indices.
warn when reconstraining parameters if warning is True.
TODO: find out which parameters have changed specifically
"""
if warning and reconstrained.size > 0: if warning and reconstrained.size > 0:
# TODO: figure out which parameters have changed and only print those
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name) print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
which.add(transform, self._raveled_index()) which.add(what, self._raveled_index())
if update:
self._notify_observers()
def _remove_from_index_operations(self, which, transforms): def _remove_from_index_operations(self, which, what):
if len(transforms) == 0: """
Helper preventing copy code.
Remove given what (transform prior etc) from which param index ops.
"""
if len(what) == 0:
transforms = which.properties() transforms = which.properties()
import numpy as np
removed = np.empty((0,), dtype=int) removed = np.empty((0,), dtype=int)
for t in transforms: for t in transforms:
unconstrained = which.remove(t, self._raveled_index()) unconstrained = which.remove(t, self._raveled_index())
@ -329,15 +530,119 @@ class Constrainable(Nameable, Indexable):
return removed return removed
class OptimizationHandlable(Constrainable, Observable):
"""
This enables optimization handles on an Object as done in GPy 0.4.
class Parameterizable(Constrainable, Observable): transformed: make sure the transformations and constraints etc are handled
"""
def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self):
[np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._param_array_.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes():
return p[self._fixes_]
return p
def _set_params_transformed(self, p):
if p is self._param_array_:
p = p.copy()
if self._has_fixes(): self._param_array_[self._fixes_] = p
else: self._param_array_[:] = p
self.untransform()
self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True):
[p._trigger_params_changed(trigger_parent=False) for p in self._parameters_]
if trigger_parent: min_priority = None
else: min_priority = -np.inf
self.notify_observers(None, min_priority)
def _size_transformed(self):
return self.size - self.constraints[__fixed__].size
#
# def _untransform_params(self, p):
# # inverse apply transformations for parameters
# #p = p.copy()
# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# return p
#
# def _get_params(self):
# """
# get all parameters
# """
# return self._param_array_
# p = np.empty(self.size, dtype=np.float64)
# if self.size == 0:
# return p
# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)]
# return p
# def _set_params(self, params, trigger_parent=True):
# self._param_array_.flat = params
# if trigger_parent: min_priority = None
# else: min_priority = -np.inf
# self.notify_observers(None, min_priority)
# don't overwrite this anymore!
#raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable"
#===========================================================================
# Optimization handles:
#===========================================================================
def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
return n[self._fixes_]
return n
#===========================================================================
# Randomizeable
#===========================================================================
def randomize(self, rand_gen=np.random.normal, loc=0, scale=1, *args, **kwargs):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator
:param rand_gen: numpy random number generator which takes args and kwargs
:param flaot loc: loc parameter for random number generator
:param float scale: scale parameter for random number generator
:param args, kwargs: will be passed through to random number generator
"""
# first take care of all parameters (from N(0,1))
x = rand_gen(loc=loc, scale=scale, size=self._size_transformed(), *args, **kwargs)
# now draw from prior where possible
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
class Parameterizable(OptimizationHandlable):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
from GPy.core.parameterization.array_core import ParamList from GPy.core.parameterization.lists_and_dicts import ArrayList
_parameters_ = ParamList() _parameters_ = ArrayList()
self.size = 0
self._param_array_ = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._added_names_ = set() self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
"""
Get the names of all parameters of this model.
:param bool add_self: whether to add the own name in front of names
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
:param bool recursive: whether to traverse through hierarchy and append leaf node names
"""
if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x)
else: adjust = lambda x: x else: adjust = lambda x: x
if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)]
@ -349,15 +654,18 @@ class Parameterizable(Constrainable, Observable):
def num_params(self): def num_params(self):
return len(self._parameters_) return len(self._parameters_)
def _add_parameter_name(self, param): def _add_parameter_name(self, param, ignore_added_names=False):
pname = adjust_name_for_printing(param.name) pname = adjust_name_for_printing(param.name)
if ignore_added_names:
self.__dict__[pname] = param
return
# and makes sure to not delete programmatically added parameters # and makes sure to not delete programmatically added parameters
if pname in self.__dict__: if pname in self.__dict__:
if not (param is self.__dict__[pname]): if not (param is self.__dict__[pname]):
if pname in self._added_names_: if pname in self._added_names_:
del self.__dict__[pname] del self.__dict__[pname]
self._add_parameter_name(param) self._add_parameter_name(param)
else: elif pname not in dir(self):
self.__dict__[pname] = param self.__dict__[pname] = param
self._added_names_.add(pname) self._added_names_.add(pname)
@ -372,47 +680,187 @@ class Parameterizable(Constrainable, Observable):
def _name_changed(self, param, old_name): def _name_changed(self, param, old_name):
self._remove_parameter_name(None, old_name) self._remove_parameter_name(None, old_name)
self._add_parameter_name(param) self._add_parameter_name(param)
#=========================================================================
# Gradient handling
#=========================================================================
@property
def gradient(self):
return self._gradient_array_
@gradient.setter
def gradient(self, val):
self._gradient_array_[:] = val
#===========================================================================
# def _collect_gradient(self, target):
# [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
#===========================================================================
#===========================================================================
# def _set_params(self, params, trigger_parent=True):
# [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
# if trigger_parent: min_priority = None
# else: min_priority = -np.inf
# self.notify_observers(None, min_priority)
#===========================================================================
#===========================================================================
# def _set_gradient(self, g):
# [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
#===========================================================================
def add_parameter(self, param, index=None, _ignore_added_names=False):
"""
:param parameters: the parameters to add
:type parameters: list of or one :py:class:`GPy.core.param.Param`
:param [index]: index of where to put parameters
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax
"""
# if param.has_parent():
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
if param in self._parameters_ and index is not None:
self.remove_parameter(param)
self.add_parameter(param, index)
elif param not in self._parameters_:
if param.has_parent():
parent = param._parent_
while parent is not None:
if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
parent = parent._parent_
param._parent_.remove_parameter(param)
# make sure the size is set
if index is None:
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
self._parameters_.append(param)
else:
start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self._parameters_.insert(index, param)
def _collect_gradient(self, target): param.add_observer(self, self._pass_through_notify_observers, -np.inf)
import itertools
[p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] self.size += param.size
def _set_gradient(self, g): self._connect_parameters(ignore_added_names=_ignore_added_names)
import itertools self._notify_parent_change()
[p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] self._connect_fixes()
else:
raise RuntimeError, """Parameter exists already added and no copy made"""
def _get_params(self):
import numpy as np
# don't overwrite this anymore!
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0])
def _set_params(self, params, update=True): def add_parameters(self, *parameters):
# don't overwrite this anymore! """
import itertools convenience method for adding several
[p._set_params(params[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] parameters without gradient specification
self._notify_parameters_changed() """
[self.add_parameter(p) for p in parameters]
def remove_parameter(self, param):
"""
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self._parameters_[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent._connect_fixes()
parent._connect_parameters()
parent._notify_parent_change()
parent = parent._parent_
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
old_size = 0
self._param_array_ = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._param_slices_ = []
for i, p in enumerate(self._parameters_):
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size+p.size)
pi_old_size = old_size
for pi in p.flattened_parameters:
pislice = slice(pi_old_size, pi_old_size+pi.size)
self._param_array_[pislice] = pi._param_array_.flat
self._gradient_array_[pislice] = pi._gradient_array_.flat
pi._param_array_.data = self._param_array_[pislice].data
pi._gradient_array_.data = self._gradient_array_[pislice].data
pi_old_size += pi.size
p._param_array_.data = self._param_array_[pslice].data
p._gradient_array_.data = self._gradient_array_[pslice].data
self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size
#===========================================================================
# notification system
#===========================================================================
def _parameters_changed_notification(self, which):
self.parameters_changed()
def _pass_through_notify_observers(self, which):
self.notify_observers(which)
#===========================================================================
# TODO: not working yet
#===========================================================================
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model""" """Returns a (deep) copy of the current model"""
import copy import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .array_core import ParamList from .lists_and_dicts import ArrayList
dc = dict() dc = dict()
for k, v in self.__dict__.iteritems(): for k, v in self.__dict__.iteritems():
if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False):
if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)):
dc[k] = v.copy() dc[k] = v.copy()
else: else:
dc[k] = copy.deepcopy(v) dc[k] = copy.deepcopy(v)
if k == '_parameters_': if k == '_parameters_':
params = [p.copy() for p in v] params = [p.copy() for p in v]
dc['_direct_parent_'] = None dc['_parent_'] = None
dc['_parent_index_'] = None dc['_parent_index_'] = None
dc['_parameters_'] = ParamList() dc['_observer_callables_'] = []
dc['_parameters_'] = ArrayList()
dc['constraints'].clear() dc['constraints'].clear()
dc['priors'].clear() dc['priors'].clear()
dc['size'] = 0 dc['size'] = 0
@ -421,16 +869,20 @@ class Parameterizable(Constrainable, Observable):
s.__dict__ = dc s.__dict__ = dc
for p in params: for p in params:
s.add_parameter(p) s.add_parameter(p, _ignore_added_names=True)
return s return s
#===========================================================================
# From being parentable, we have to define the parent_change notification
#===========================================================================
def _notify_parent_change(self):
"""
Notify all parameters that the parent has changed
"""
for p in self._parameters_:
p._parent_changed(self)
def _notify_parameters_changed(self):
self.parameters_changed()
self._notify_observers()
if self.has_parent():
self._direct_parent_._notify_parameters_changed()
def parameters_changed(self): def parameters_changed(self):
""" """
This method gets called when parameters have changed. This method gets called when parameters have changed.

View file

@ -7,11 +7,17 @@ import cPickle
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation from param import ParamConcatenation
from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable from parameter_core import Pickleable, Parameterizable, adjust_name_for_printing
from transformations import __fixed__ from transformations import __fixed__
from array_core import ParamList from lists_and_dicts import ArrayList
class Parameterized(Parameterizable, Pickleable, Gradcheckable): class ParametersChangedMeta(type):
def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
instance.parameters_changed()
return instance
class Parameterized(Parameterizable, Pickleable):
""" """
Parameterized class Parameterized class
@ -53,11 +59,18 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
If you want to operate on all parameters use m[''] to wildcard select all paramters If you want to operate on all parameters use m[''] to wildcard select all paramters
and concatenate them. Printing m[''] will result in printing of all parameters in detail. and concatenate them. Printing m[''] will result in printing of all parameters in detail.
""" """
#===========================================================================
# Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta
#===========================================================================
def __init__(self, name=None, *a, **kw): def __init__(self, name=None, *a, **kw):
super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw)
self._in_init_ = True self._in_init_ = True
self._parameters_ = ParamList() self._parameters_ = ArrayList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
self._param_slices_ = [] self._param_slices_ = []
@ -65,7 +78,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
del self._in_init_ del self._in_init_
def build_pydot(self, G=None): def build_pydot(self, G=None):
import pydot import pydot # @UnresolvedImport
iamroot = False iamroot = False
if G is None: if G is None:
G = pydot.Dot(graph_type='digraph') G = pydot.Dot(graph_type='digraph')
@ -87,116 +100,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
return G return G
return node return node
def add_parameter(self, param, index=None):
"""
:param parameters: the parameters to add
:type parameters: list of or one :py:class:`GPy.core.param.Param`
:param [index]: index of where to put parameters
Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax
"""
# if param.has_parent():
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
if param in self._parameters_ and index is not None:
self.remove_parameter(param)
self.add_parameter(param, index)
elif param not in self._parameters_:
# make sure the size is set
if index is None:
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
self._parameters_.append(param)
else:
start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self._parameters_.insert(index, param)
self.size += param.size
else:
raise RuntimeError, """Parameter exists already added and no copy made"""
self._connect_parameters()
self._notify_parent_change()
self._connect_fixes()
def add_parameters(self, *parameters):
"""
convenience method for adding several
parameters without gradient specification
"""
[self.add_parameter(p) for p in parameters]
def remove_parameter(self, param):
"""
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self._parameters_[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._notify_parameters_changed)
self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
def _connect_parameters(self):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
sizes = [0]
self._param_slices_ = []
for i, p in enumerate(self._parameters_):
p._direct_parent_ = self
p._parent_index_ = i
sizes.append(p.size + sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
self._add_parameter_name(p)
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
if isinstance(f, str):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state) # set state
# self._set_params(self._get_params()) # restore all values
return
self.__dict__ = state
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class, Get the current state of the class,
@ -225,60 +129,33 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
self._connect_parameters() self._connect_parameters()
self.parameters_changed() self.parameters_changed()
#=========================================================================== #===========================================================================
# Override copy to handle programmatically added observers
#===========================================================================
def copy(self):
c = super(Pickleable, self).copy()
c.add_observer(c, c._parameters_changed_notification, -100)
return c
#===========================================================================
# Gradient control # Gradient control
#=========================================================================== #===========================================================================
def _transform_gradients(self, g): def _transform_gradients(self, g):
if self.has_parent(): if self.has_parent():
return g return g
x = self._get_params() [numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[numpy.put(g, i, g[i] * c.gradfactor(x[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
for p in self.flattened_parameters:
for t, i in p._tied_to_me_.iteritems():
g[self._offset_for(p) + numpy.array(list(i))] += g[self._raveled_index_for(t)]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
#=========================================================================== #===========================================================================
# Optimization handles: # Indexable
#=========================================================================== #===========================================================================
def _get_param_names(self):
n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
return n[self._fixes_]
return n
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._get_params()
[numpy.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes():
return p[self._fixes_]
return p
def _set_params_transformed(self, p):
# inverse apply transformations for parameters and set the resulting parameters
self._set_params(self._untransform_params(p))
def _untransform_params(self, p):
p = p.copy()
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
return p
#===========================================================================
# Indexable Handling
#===========================================================================
def _backtranslate_index(self, param, ind):
# translate an index in parameterized indexing into the index of param
ind = ind - self._offset_for(param)
ind = ind[ind >= 0]
internal_offset = param._internal_offset()
ind = ind[ind < param.size + internal_offset]
return ind
def _offset_for(self, param): def _offset_for(self, param):
# get the offset in the parameterized index array for param # get the offset in the parameterized index array for param
if param.has_parent(): if param.has_parent():
if param._direct_parent_._get_original(param) in self._parameters_: if param._parent_._get_original(param) in self._parameters_:
return self._param_slices_[param._direct_parent_._get_original(param)._parent_index_].start return self._param_slices_[param._parent_._get_original(param)._parent_index_].start
return self._offset_for(param._direct_parent_) + param._direct_parent_._offset_for(param) return self._offset_for(param._parent_) + param._parent_._offset_for(param)
return 0 return 0
def _raveled_index_for(self, param): def _raveled_index_for(self, param):
@ -297,34 +174,22 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
this is not in the global view of things! this is not in the global view of things!
""" """
return numpy.r_[:self.size] return numpy.r_[:self.size]
#===========================================================================
# Fixing parameters:
#===========================================================================
def _fixes_for(self, param):
if self._has_fixes():
return self._fixes_[self._raveled_index_for(param)]
return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
#=========================================================================== #===========================================================================
# Convenience for fixed, tied checking of param: # Convenience for fixed, tied checking of param:
#=========================================================================== #===========================================================================
def fixed_indices(self):
return np.array([x.is_fixed for x in self._parameters_])
def _is_fixed(self, param):
# returns if the whole param is fixed
if not self._has_fixes():
return False
return not self._fixes_[self._raveled_index_for(param)].any()
# return not self._fixes_[self._offset_for(param): self._offset_for(param)+param._realsize_].any()
@property @property
def is_fixed(self): def is_fixed(self):
for p in self._parameters_: for p in self._parameters_:
if not p.is_fixed: return False if not p.is_fixed: return False
return True return True
def _get_original(self, param): def _get_original(self, param):
# if advanced indexing is activated it happens that the array is a copy # if advanced indexing is activated it happens that the array is a copy
# you can retrieve the original param through this method, by passing # you can retrieve the original param through this method, by passing
# the copy here # the copy here
return self._parameters_[param._parent_index_] return self._parameters_[param._parent_index_]
#=========================================================================== #===========================================================================
# Get/set parameters: # Get/set parameters:
#=========================================================================== #===========================================================================
@ -352,9 +217,13 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
return ParamConcatenation(paramlist) return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None): def __setitem__(self, name, value, paramlist=None):
try: param = self.__getitem__(name, paramlist) if isinstance(name, (slice, tuple, np.ndarray)):
except AttributeError as a: raise a self._param_array_[name] = value
param[:] = value else:
try: param = self.__getitem__(name, paramlist)
except AttributeError as a: raise a
param[:] = value
def __setattr__(self, name, val): def __setattr__(self, name, val):
# override the default behaviour, if setting a param, so broadcasting can by used # override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, '_parameters_'): if hasattr(self, '_parameters_'):
@ -365,7 +234,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
# Printing: # Printing:
#=========================================================================== #===========================================================================
def _short(self): def _short(self):
return self.hirarchy_name() return self.hierarchy_name()
@property @property
def flattened_parameters(self): def flattened_parameters(self):
return [xi for x in self._parameters_ for xi in x.flattened_parameters] return [xi for x in self._parameters_ for xi in x.flattened_parameters]
@ -373,11 +242,6 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
def _parameter_sizes_(self): def _parameter_sizes_(self):
return [x.size for x in self._parameters_] return [x.size for x in self._parameters_]
@property @property
def size_transformed(self):
if self._has_fixes():
return sum(self._fixes_)
return self.size
@property
def parameter_shapes(self): def parameter_shapes(self):
return [xi for x in self._parameters_ for xi in x.parameter_shapes] return [xi for x in self._parameters_ for xi in x.parameter_shapes]
@property @property
@ -404,7 +268,7 @@ class Parameterized(Parameterizable, Pickleable, Gradcheckable):
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]]) cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]]) tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]]) pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:^{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl) format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:>{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
to_print = [] to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs): for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p)) to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))

View file

@ -64,6 +64,36 @@ class Gaussian(Prior):
return np.random.randn(n) * self.sigma + self.mu return np.random.randn(n) * self.sigma + self.mu
class Uniform(Prior):
domain = _REAL
_instances = []
def __new__(cls, lower, upper): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().lower == lower and instance().upper == upper:
return instance()
o = super(Prior, cls).__new__(cls, lower, upper)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower, upper):
self.lower = float(lower)
self.upper = float(upper)
def __str__(self):
return "[" + str(np.round(self.lower)) + ', ' + str(np.round(self.upper)) + ']'
def lnpdf(self, x):
region = (x>=self.lower) * (x<=self.upper)
return region
def lnpdf_grad(self, x):
return np.zeros(x.shape)
def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n)
class LogGaussian(Prior): class LogGaussian(Prior):
""" """
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables. Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.

View file

@ -6,8 +6,11 @@ import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref import weakref
import sys
#_lim_val = -np.log(sys.float_info.epsilon)
_exp_lim_val = np.finfo(np.float64).max _exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)#-np.log(sys.float_info.epsilon) _lim_val = np.log(_exp_lim_val)
#=============================================================================== #===============================================================================
# Fixing constants # Fixing constants
@ -35,7 +38,6 @@ class Transformation(object):
""" produce a sensible initial value for f(x)""" """ produce a sensible initial value for f(x)"""
raise NotImplementedError raise NotImplementedError
def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw): def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots from ...plotting.matplot_dep import base_plots
@ -52,10 +54,10 @@ class Transformation(object):
class Logexp(Transformation): class Logexp(Transformation):
domain = _POSITIVE domain = _POSITIVE
def f(self, x): def f(self, x):
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val)))) return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val))))
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x))) #raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f): def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f) - 1.)) return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
def gradfactor(self, f): def gradfactor(self, f):
return np.where(f>_lim_val, 1., 1 - np.exp(-f)) return np.where(f>_lim_val, 1., 1 - np.exp(-f))
def initialize(self, f): def initialize(self, f):

View file

@ -7,10 +7,10 @@ Created on 6 Nov 2013
import numpy as np import numpy as np
from parameterized import Parameterized from parameterized import Parameterized
from param import Param from param import Param
from transformations import Logexp from transformations import Logexp, Logistic
class VariationalPrior(Parameterized): class VariationalPrior(Parameterized):
def __init__(self, name=None, **kw): def __init__(self, name='latent space', **kw):
super(VariationalPrior, self).__init__(name=name, **kw) super(VariationalPrior, self).__init__(name=name, **kw)
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
@ -34,12 +34,12 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior): class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, variance = 1.0, pi = 0.5, name='SpikeAndSlabPrior', **kw): def __init__(self, pi, variance = 1.0, name='SpikeAndSlabPrior', **kw):
super(VariationalPrior, self).__init__(name=name, **kw) super(VariationalPrior, self).__init__(name=name, **kw)
assert variance==1.0, "Not Implemented!" assert variance==1.0, "Not Implemented!"
self.pi = Param('pi', pi) self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
self.variance = Param('variance',variance) self.variance = Param('variance',variance)
self.add_parameters(self.pi, self.variance) self.add_parameters(self.pi)
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
@ -58,6 +58,8 @@ class SpikeAndSlabPrior(VariationalPrior):
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
mu.gradient -= gamma*mu mu.gradient -= gamma*mu
S.gradient -= (1. - (1. / (S))) * gamma /2. S.gradient -= (1. - (1. / (S))) * gamma /2.
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
class VariationalPosterior(Parameterized): class VariationalPosterior(Parameterized):
@ -103,7 +105,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,) self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
self.add_parameter(self.gamma) self.add_parameter(self.gamma)
def plot(self, *args): def plot(self, *args):
@ -115,4 +117,4 @@ class SpikeAndSlabPosterior(VariationalPosterior):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args) return variational_plots.plot_SpikeSlab(self,*args)

View file

@ -48,7 +48,6 @@ class SparseGP(GP):
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
self.parameters_changed()
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
@ -60,11 +59,9 @@ class SparseGP(GP):
#gradients wrt kernel #gradients wrt kernel
dL_dKmm = self.grad_dict.pop('dL_dKmm') dL_dKmm = self.grad_dict.pop('dL_dKmm')
self.kern.update_gradients_full(dL_dKmm, self.Z, None) self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = np.zeros(self.kern.size) target = self.kern.gradient.copy()
self.kern._collect_gradient(target)
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
self.kern._collect_gradient(target) self.kern.gradient += target
self.kern._set_gradient(target)
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
@ -72,24 +69,22 @@ class SparseGP(GP):
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel
target = np.zeros(self.kern.size)
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
self.kern._collect_gradient(target) target = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z) self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
self.kern._collect_gradient(target) target += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern._collect_gradient(target) self.kern.gradient += target
self.kern._set_gradient(target)
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): def _raw_predict(self, Xnew, full_cov=False):
""" """
Make a prediction for the latent function values Make a prediction for the latent function values
""" """
if X_variance_new is None: if not isinstance(Xnew, VariationalPosterior):
Kx = self.kern.K(self.Z, Xnew) Kx = self.kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
@ -100,13 +95,13 @@ class SparseGP(GP):
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) Kx = self.kern.psi1(self.Z, Xnew)
mu = np.dot(Kx, self.Cpsi1V) mu = np.dot(Kx, self.posterior.woodbury_vector)
if full_cov: if full_cov:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
else: else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) Kxx = self.kern.psi0(self.Z, Xnew)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var return mu, var
@ -114,14 +109,12 @@ class SparseGP(GP):
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class, Get the current state of the class,
here just all the indices, rest can get recomputed
""" """
return GP._getstate(self) + [self.Z, return GP._getstate(self) + [
self.num_inducing, self.Z,
self.X_variance] self.num_inducing]
def _setstate(self, state): def _setstate(self, state):
self.X_variance = state.pop()
self.num_inducing = state.pop() self.num_inducing = state.pop()
self.Z = state.pop() self.Z = state.pop()
GP._setstate(self, state) GP._setstate(self, state)

View file

@ -187,10 +187,10 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234) _np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None] x = _np.linspace(0, 4 * _np.pi, N)[:, None]
s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x))) s1 = _np.vectorize(lambda x: _np.sin(x))
s2 = _np.vectorize(lambda x: _np.cos(x)**2) s2 = _np.vectorize(lambda x: _np.cos(x)**2)
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: x*_np.sin(x)) sS = _np.vectorize(lambda x: _np.cos(x))
s1 = s1(x) s1 = s1(x)
s2 = s2(x) s2 = s2(x)
@ -202,7 +202,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
s3 -= s3.mean(); s3 /= s3.std(0) s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0) sS -= sS.mean(); sS /= sS.std(0)
S1 = _np.hstack([s1, s2, sS]) S1 = _np.hstack([s1, sS])
S2 = _np.hstack([s2, s3, sS]) S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS]) S3 = _np.hstack([s3, sS])
@ -270,7 +270,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
from GPy import kern from GPy import kern
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
@ -294,7 +294,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 7, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
@ -515,3 +515,28 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose
lvm_visualizer.close() lvm_visualizer.close()
return m return m
def ssgplvm_simulation_linear():
import numpy as np
import GPy
N, D, Q = 1000, 20, 5
pi = 0.2
def sample_X(Q, pi):
x = np.empty(Q)
dies = np.random.rand(Q)
for q in xrange(Q):
if dies[q]<pi:
x[q] = np.random.randn()
else:
x[q] = 0.
return x
Y = np.empty((N,D))
X = np.empty((N,Q))
# Generate data from random sampled weight matrices
for n in xrange(N):
X[n] = sample_X(Q,pi)
w = np.random.randn(D,Q)
Y[n] = np.dot(w,X[n])

View file

@ -30,7 +30,7 @@ def student_t_approx(optimize=True, plot=True):
#Yc = Yc/Yc.max() #Yc = Yc/Yc.max()
#Add student t random noise to datapoints #Add student t random noise to datapoints
deg_free = 5 deg_free = 1
print "Real noise: ", real_std print "Real noise: ", real_std
initial_var_guess = 0.5 initial_var_guess = 0.5
edited_real_sd = initial_var_guess edited_real_sd = initial_var_guess
@ -47,9 +47,9 @@ def student_t_approx(optimize=True, plot=True):
m1['.*white'].constrain_fixed(1e-5) m1['.*white'].constrain_fixed(1e-5)
m1.randomize() m1.randomize()
##Gaussian GP model on corrupt data #Gaussian GP model on corrupt data
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2) m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
m1['.*white'].constrain_fixed(1e-5) m2['.*white'].constrain_fixed(1e-5)
m2.randomize() m2.randomize()
#Student t GP model on clean data #Student t GP model on clean data
@ -59,10 +59,6 @@ def student_t_approx(optimize=True, plot=True):
m3['.*t_noise'].constrain_bounded(1e-6, 10.) m3['.*t_noise'].constrain_bounded(1e-6, 10.)
m3['.*white'].constrain_fixed(1e-5) m3['.*white'].constrain_fixed(1e-5)
m3.randomize() m3.randomize()
debug = True
#TODO: remove
return m3
#Student t GP model on corrupt data #Student t GP model on corrupt data
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
@ -71,6 +67,16 @@ def student_t_approx(optimize=True, plot=True):
m4['.*t_noise'].constrain_bounded(1e-6, 10.) m4['.*t_noise'].constrain_bounded(1e-6, 10.)
m4['.*white'].constrain_fixed(1e-5) m4['.*white'].constrain_fixed(1e-5)
m4.randomize() m4.randomize()
print m4
debug=True
if debug:
m4.optimize(messages=1)
import pylab as pb
pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx')
m4.plot()
print m4
return m4
if optimize: if optimize:
optimizer='scg' optimizer='scg'

View file

@ -284,7 +284,7 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
kern = GPy.kern.RBF(1) kern = GPy.kern.RBF(1)
poisson_lik = GPy.likelihoods.Poisson() poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() laplace_inf = GPy.inference.latent_function_inference.Laplace()
# create simple GP Model # create simple GP Model
m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf) m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf)

View file

@ -11,7 +11,7 @@
#http://gaussianprocess.org/gpml/code. #http://gaussianprocess.org/gpml/code.
import numpy as np import numpy as np
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
from ...util.misc import param_to_array from ...util.misc import param_to_array
from posterior import Posterior from posterior import Posterior
import warnings import warnings
@ -148,7 +148,7 @@ class Laplace(object):
#compute vital matrices #compute vital matrices
C = np.dot(LiW12, K) C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C) Ki_W_i = K - C.T.dot(C) #Could this be wrong?
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))

View file

@ -19,12 +19,16 @@ class VarDTC(object):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self): def __init__(self, limit=1):
#self._YYTfactor_cache = caching.cache() #self._YYTfactor_cache = caching.cache()
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, 1) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, 1) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
@ -60,8 +64,7 @@ class VarDTC(object):
_, output_dim = Y.shape _, output_dim = Y.shape
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.squeeze(likelihood.variance) beta = 1./np.fmax(likelihood.variance, 1e-6)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = self.get_YYTfactor(Y) #self.YYTfactor = self.get_YYTfactor(Y)
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
@ -76,7 +79,7 @@ class VarDTC(object):
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
Kmm = kern.K(Z) Kmm = kern.K(Z)
Lm = jitchol(Kmm) Lm = jitchol(Kmm+np.eye(Z.shape[0])*self.const_jitter)
# The rather complex computations of A # The rather complex computations of A
if uncertain_inputs: if uncertain_inputs:
@ -176,11 +179,14 @@ class VarDTC(object):
return post, log_marginal, grad_dict return post, log_marginal, grad_dict
class VarDTCMissingData(object): class VarDTCMissingData(object):
def __init__(self): def __init__(self, limit=1):
from ...util.caching import Cacher from ...util.caching import Cacher
self._Y = Cacher(self._subarray_computations, 1) self._Y = Cacher(self._subarray_computations, limit)
pass pass
def set_limit(self, limit):
self._Y.limit = limit
def _subarray_computations(self, Y): def _subarray_computations(self, Y):
inan = np.isnan(Y) inan = np.isnan(Y)
has_none = inan.any() has_none = inan.any()
@ -214,7 +220,7 @@ class VarDTCMissingData(object):
psi2_all = None psi2_all = None
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./likelihood.variance beta_all = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools import itertools

View file

@ -51,8 +51,6 @@ class Coregionalize(Kern):
assert kappa.shape==(self.output_dim, ) assert kappa.shape==(self.output_dim, )
self.kappa = Param('kappa', kappa, Logexp()) self.kappa = Param('kappa', kappa, Logexp())
self.add_parameters(self.W, self.kappa) self.add_parameters(self.W, self.kappa)
self.parameters_changed()
def parameters_changed(self): def parameters_changed(self):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)

View file

@ -73,7 +73,7 @@ class Kern(Parameterized):
See GPy.plotting.matplot_dep.plot See GPy.plotting.matplot_dep.plot
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import kernel_plots from ...plotting.matplot_dep import kernel_plots
kernel_plots.plot(self,*args) kernel_plots.plot(self,*args)
def plot_ARD(self, *args, **kw): def plot_ARD(self, *args, **kw):
@ -89,7 +89,7 @@ class Kern(Parameterized):
""" """
Returns the sensitivity for each dimension of this kernel. Returns the sensitivity for each dimension of this kernel.
""" """
return np.zeros(self.input_dim) return self.kern.input_sensitivity()
def __add__(self, other): def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """ """ Overloading of the '+' operator. for more control, see self.add """
@ -112,10 +112,12 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from add import Add
return Add([self, other], tensor) kernels = []
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_)
def __call__(self, X, X2=None): else: kernels.append(self)
return self.K(X, X2) if not tensor and isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other)
return Add(kernels, tensor)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
@ -127,7 +129,7 @@ class Kern(Parameterized):
""" """
return self.prod(other, tensor=True) return self.prod(other, tensor=True)
def prod(self, other, tensor=False): def prod(self, other, tensor=False, name=None):
""" """
Multiply two kernels (either on the same space, or on the tensor Multiply two kernels (either on the same space, or on the tensor
product of the input space). product of the input space).
@ -140,4 +142,4 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod from prod import Prod
return Prod(self, other, tensor) return Prod(self, other, tensor, name)

View file

@ -6,10 +6,12 @@ import numpy as np
from scipy import weave from scipy import weave
from kern import Kern from kern import Kern
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this from ...util.caching import Cache_this
from ...core.parameterization import variational
from psi_comp import linear_psi_comp
class Linear(Kern): class Linear(Kern):
""" """
@ -104,49 +106,113 @@ class Linear(Kern):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return np.sum(self.variances * self._mu2S(variational_posterior), 1) if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
S = variational_posterior.variance
return np.einsum('q,nq,nq->n',self.variances,gamma,np.square(mu)+S)
# return (self.variances*gamma*(np.square(mu)+S)).sum(axis=1)
else:
return np.sum(self.variances * self._mu2S(variational_posterior), 1)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return self.K(variational_posterior.mean, Z) #the variance, it does nothing if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
# return (self.variances*gamma*mu).sum(axis=1)
else:
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
@Cache_this(limit=1) @Cache_this(limit=1)
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
ZA = Z * self.variances if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
ZAinner = self._ZAinner(variational_posterior, Z) gamma = variational_posterior.binary_prob
return np.dot(ZAinner, ZA.T) mu = variational_posterior.mean
S = variational_posterior.variance
mu2 = np.square(mu)
variances2 = np.square(self.variances)
tmp = np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
return np.einsum('nq,q,mq,oq,nq->nmo',gamma,variances2,Z,Z,mu2+S)+\
np.einsum('nm,no->nmo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->nmo',np.square(gamma),variances2,Z,Z,mu2)
else:
ZA = Z * self.variances
ZAinner = self._ZAinner(variational_posterior, Z)
return np.dot(ZAinner, ZA.T)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#psi1 if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z) gamma = variational_posterior.binary_prob
# psi0: mu = variational_posterior.mean
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) S = variational_posterior.variance
if self.ARD: self.variances.gradient += tmp.sum(0) mu2S = np.square(mu)+S
else: self.variances.gradient += tmp.sum()
#psi2 _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
if self.ARD: grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :]) np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0) if self.ARD:
self.variances.gradient = grad
else:
self.variances.gradient = grad.sum()
else: else:
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances #psi1
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
# psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: self.variances.gradient += tmp.sum(0)
else: self.variances.gradient += tmp.sum()
#psi2
if self.ARD:
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :])
self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0)
else:
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#psi1 if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) gamma = variational_posterior.binary_prob
#psi2 mu = variational_posterior.mean
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) S = variational_posterior.variance
return grad _, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
return grad
else:
#psi1
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
#psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
return grad
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
# psi0 gamma = variational_posterior.binary_prob
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) mu = variational_posterior.mean
grad_S += dL_dpsi0[:, None] * self.variances S = variational_posterior.variance
# psi1 mu2S = np.square(mu)+S
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
# psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma)
return grad_mu, grad_S grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu)
grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS)
return grad_mu, grad_S, grad_gamma
else:
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
# psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
grad_S += dL_dpsi0[:, None] * self.variances
# psi1
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
# psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
return grad_mu, grad_S
#--------------------------------------------------# #--------------------------------------------------#
# Helpers for psi statistics # # Helpers for psi statistics #

View file

@ -34,7 +34,6 @@ class Periodic(Kern):
self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp()) self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
self.period = Param('period', np.float64(period), Logexp()) self.period = Param('period', np.float64(period), Logexp())
self.add_parameters(self.variance, self.lengthscale, self.period) self.add_parameters(self.variance, self.lengthscale, self.period)
self.parameters_changed()
def _cos(self, alpha, omega, phase): def _cos(self, alpha, omega, phase):
def f(x): def f(x):

View file

@ -15,14 +15,16 @@ class Prod(Kern):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, k1, k2, tensor=False): def __init__(self, k1, k2, tensor=False,name=None):
if tensor: if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) name = k1.name + '_xx_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
self.slice1 = slice(0,k1.input_dim) self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim) self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else: else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) name = k1.name + '_x_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim, name)
self.slice1 = slice(0, self.input_dim) self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim) self.slice2 = slice(0, self.input_dim)
self.k1 = k1 self.k1 = k1
@ -39,17 +41,17 @@ class Prod(Kern):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
if X2 is None: if X2 is None:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None) target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None) target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2], None)
else: else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1]) target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2]) target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):

View file

@ -0,0 +1,2 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)

View file

@ -0,0 +1,51 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for SSGPLVM
"""
import numpy as np
from GPy.util.caching import Cache_this
#@Cache_this(limit=1)
def _psi2computations(variance, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2 NxMxM
# _psi2_dvariance NxMxMxQ
# _psi2_dZ NxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
mu2 = np.square(mu)
gamma2 = np.square(gamma)
variance2 = np.square(variance)
mu2S = mu2+S # NxQ
common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
_dpsi2_dvariance = np.einsum('nq,q,mq,oq->nmoq',2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\
np.einsum('nq,mq,nq,no->nmoq',gamma,Z,mu,common_sum)+\
np.einsum('nq,oq,nq,nm->nmoq',gamma,Z,mu,common_sum)
_dpsi2_dgamma = np.einsum('q,mq,oq,nq->nmoq',variance2,Z,Z,(mu2S-2.*gamma*mu2))+\
np.einsum('q,mq,nq,no->nmoq',variance,Z,mu,common_sum)+\
np.einsum('q,oq,nq,nm->nmoq',variance,Z,mu,common_sum)
_dpsi2_dmu = np.einsum('q,mq,oq,nq,nq->nmoq',variance2,Z,Z,mu,2.*(gamma-gamma2))+\
np.einsum('nq,q,mq,no->nmoq',gamma,variance,Z,common_sum)+\
np.einsum('nq,q,oq,nm->nmoq',gamma,variance,Z,common_sum)
_dpsi2_dS = np.einsum('nq,q,mq,oq->nmoq',gamma,variance2,Z,Z)
_dpsi2_dZ = 2.*(np.einsum('nq,q,mq,nq->nmq',gamma,variance2,Z,mu2S)+np.einsum('nq,q,nq,nm->nmq',gamma,variance,mu,common_sum)
-np.einsum('nq,q,mq,nq->nmq',gamma2,variance2,Z,mu2))
return _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ

View file

@ -0,0 +1,107 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the psi statistics computation
"""
import numpy as np
from GPy.util.caching import Cache_this
@Cache_this(limit=1)
def _Z_distances(Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
return Zhat, Zdist
@Cache_this(limit=1)
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dpsi1_dvariance = _psi1 / variance # NxM
_dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
_dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
_dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
_dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
_dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _psi1, _dpsi1_dvariance, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _dpsi1_dZ, _dpsi1_dlengthscale
@Cache_this(limit=1)
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2 NxMxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat, _psi2_Zdist = _Z_distances(Z)
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = np.square(variance) * np.exp(_psi2_exp_sum) # N,M,M
_dpsi2_dvariance = 2. * _psi2/variance # NxMxM
_dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
_dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
_dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
_dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _psi2, _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ, _dpsi2_dlengthscale

View file

@ -7,6 +7,8 @@ from scipy import weave
from ...util.misc import param_to_array from ...util.misc import param_to_array
from stationary import Stationary from stationary import Stationary
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this
from ...core.parameterization import variational
from psi_comp import ssrbf_psi_comp
class RBF(Stationary): class RBF(Stationary):
""" """
@ -18,7 +20,7 @@ class RBF(Stationary):
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='RBF'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name) super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
self.weave_options = {} self.weave_options = {}
@ -36,76 +38,145 @@ class RBF(Stationary):
return self.Kdiag(variational_posterior.mean) return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
_, _, _, psi1 = self._psi1computations(Z, variational_posterior) if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
else:
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1 return psi1
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
_, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
else:
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2 return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
l2 = self.lengthscale **2 # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
#from psi1
denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
else:
self.lengthscale.gradient += dpsi1_dlength.sum()
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2
S = variational_posterior.variance
_, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
if not self.ARD:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
#from psi1
denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum()
else: else:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0) raise ValueError, "unknown distriubtion received for psi-statistics"
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2
S = variational_posterior.variance
denom, _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
d_length = 2.*psi2[:, :, :, None] * (Zdist_sq * denom + mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * denom)
#TODO: combine denom and l2 as denom_l2??
#TODO: tidy the above!
#TODO: tensordot below?
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum()
else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
l2 = self.lengthscale **2 # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad
#psi1 elif isinstance(variational_posterior, variational.NormalPosterior):
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
denominator = l2 * denom l2 = self.lengthscale **2
dpsi1_dZ = -psi1[:, :, None] * (dist / denominator)
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2 #psi1
denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
term1 = Zdist / l2 # M, M, Q grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2))
term2 = mudist / denom / l2 # N, M, M, Q
dZ = psi2[:, :, :, None] * (term1[None, :, :, :] + term2) #N,M,M,Q
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
return grad #psi2
Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
term1 = Zdist / l2 # M, M, Q
S = variational_posterior.variance
term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q
grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2)
return grad
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
l2 = self.lengthscale **2 # Spike-and-Slab GPLVM
#psi1 if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior) ndata = variational_posterior.mean.shape[0]
tmp = psi1[:, :, None] / l2 / denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1) _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1) _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi2
denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) #psi1
tmp = psi2[:, :, :, None] / l2 / denom grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * mudist).sum(1).sum(1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*mudist_sq - 1)).sum(1).sum(1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
tmp = psi1[:, :, None] / l2 / denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
#psi2
_, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
S = variational_posterior.variance
tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2)
grad_mu += -2.*np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist)
grad_S += np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1))
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
return grad_mu, grad_S return grad_mu, grad_S
@ -113,61 +184,6 @@ class RBF(Stationary):
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
#TODO: this function is unused, but it will be useful in the stationary class
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
"""
A helper function for update_gradients_* methods
Computes the derivative of the objective L wrt the lengthscales via
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
assumes self._K_computations has just been called.
This is only valid if self.ARD=True
"""
target = np.zeros(self.input_dim)
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
X, dvardLdK, var_len3 = param_to_array(X, dvardLdK, var_len3)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2, dvardLdK, var_len3 = param_to_array(X, X2, dvardLdK, var_len3)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target
@Cache_this(limit=1) @Cache_this(limit=1)
def _psi1computations(self, Z, vp): def _psi1computations(self, Z, vp):
mu, S = vp.mean, vp.variance mu, S = vp.mean, vp.variance
@ -180,7 +196,7 @@ class RBF(Stationary):
return denom, dist, dist_sq, psi1 return denom, dist, dist_sq, psi1
#@cache_this(ignore_args=(1,)) @Cache_this(limit=1, ignore_args=(0,))
def _Z_distances(self, Z): def _Z_distances(self, Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
@ -200,7 +216,6 @@ class RBF(Stationary):
#allocate memory for the things we want to compute #allocate memory for the things we want to compute
mudist = np.empty((N, M, M, Q)) mudist = np.empty((N, M, M, Q))
mudist_sq = np.empty((N, M, M, Q)) mudist_sq = np.empty((N, M, M, Q))
exponent = np.zeros((N,M,M))
psi2 = np.empty((N, M, M)) psi2 = np.empty((N, M, M))
l2 = self.lengthscale **2 l2 = self.lengthscale **2
@ -212,7 +227,7 @@ class RBF(Stationary):
code = """ code = """
double tmp, exponent_tmp; double tmp, exponent_tmp;
//#pragma omp parallel for private(tmp, exponent_tmp) #pragma omp parallel for private(tmp, exponent_tmp)
for (int n=0; n<N; n++) for (int n=0; n<N; n++)
{ {
for (int m=0; m<M; m++) for (int m=0; m<M; m++)
@ -253,8 +268,48 @@ class RBF(Stationary):
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'], arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
return denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 return Zdist, Zdist_sq, mudist, mudist_sq, psi2
def input_sensitivity(self): def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2):
if self.ARD: return 1./self.lengthscale
else: return (1./self.lengthscale).repeat(self.input_dim) #here's the einsum equivalent, it's ~3 times slower
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
result = np.zeros(self.input_dim)
code = """
double tmp;
for(int q=0; q<Q; q++)
{
tmp = 0.0;
#pragma omp parallel for reduction(+:tmp)
for(int n=0; n<N; n++)
{
for(int m=0; m<M; m++)
{
//diag terms
tmp += dL_dpsi2(n,m,m) * psi2(n,m,m) * (Zdist_sq(m,m,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,m,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
//off-diag terms
for(int mm=0; mm<m; mm++)
{
tmp += 2.0 * dL_dpsi2(n,m,mm) * psi2(n,m,mm) * (Zdist_sq(m,mm,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,mm,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
}
}
}
result(q) = tmp;
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
N,Q = S.shape
M = psi2.shape[-1]
S = param_to_array(S)
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
type_converters=weave.converters.blitz, **self.weave_options)
return 2.*result*self.lengthscale

View file

@ -7,6 +7,7 @@ import numpy as np
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.config import * from ...util.config import *
from stationary import Stationary from stationary import Stationary
from psi_comp import ssrbf_psi_comp
class SSRBF(Stationary): class SSRBF(Stationary):
""" """
@ -54,101 +55,63 @@ class SSRBF(Stationary):
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, posterior_variational): def psi0(self, Z, variational_posterior):
ret = np.empty(posterior_variational.mean.shape[0]) ret = np.empty(variational_posterior.mean.shape[0])
ret[:] = self.variance ret[:] = self.variance
return ret return ret
def psi1(self, Z, posterior_variational): def psi1(self, Z, variational_posterior):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return self._psi1 return _psi1
def psi2(self, Z, posterior_variational): def psi2(self, Z, variational_posterior):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return self._psi2 return _psi2
def dL_dpsi0_dmuSgamma(self, dL_dpsi0, Z, mu, S, gamma, target_mu, target_S, target_gamma): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
pass _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
def dL_dpsi1_dmuSgamma(self, dL_dpsi1, Z, mu, S, gamma, target_mu, target_S, target_gamma):
self._psi_computations(Z, mu, S, gamma)
target_mu += (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1)
target_S += (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1)
target_gamma += (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1)
def dL_dpsi2_dmuSgamma(self, dL_dpsi2, Z, mu, S, gamma, target_mu, target_S, target_gamma):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S, gamma)
target_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(mu.shape[0],-1,mu.shape[1]).sum(axis=1)
target_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(S.shape[0],-1,S.shape[1]).sum(axis=1)
target_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(gamma.shape[0],-1,gamma.shape[1]).sum(axis=1)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational):
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
#from psi1 #from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._dpsi1_dvariance) self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*self._dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from psi2 #from psi2
self.variance.gradient += (dL_dpsi2 * self._dpsi2_dvariance).sum() self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * self._dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from Kmm
self._K_computations(Z, None)
dvardLdK = self._K_dvar * dL_dKmm
var_len3 = self.variance / (np.square(self.lengthscale)*self.lengthscale)
self.variance.gradient += np.sum(dvardLdK)
self.lengthscale.gradient += (np.square(Z[:,None,:]-Z[None,:,:])*dvardLdK[:,:,None]).reshape(-1,self.input_dim).sum(axis=0)*var_len3
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob) _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad = (dL_dpsi1[:, :, None] * self._dpsi1_dZ).sum(axis=0) grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2 #psi2
grad += (dL_dpsi2[:, :, :, None] * self._dpsi2_dZ).sum(axis=0).sum(axis=1) grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
grad += self.gradients_X(dL_dKmm, Z, None)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ndata = posterior_variational.mean.shape[0] ndata = variational_posterior.mean.shape[0]
self._psi_computations(Z, posterior_variational.mean, posterior_variational.variance, posterior_variational.binary_prob)
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad_mu = (dL_dpsi1[:, :, None] * self._dpsi1_dmu).sum(axis=1) grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * self._dpsi1_dS).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * self._dpsi1_dgamma).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2 #psi2
grad_mu += (dL_dpsi2[:, :, :, None] * self._dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * self._dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] *self._dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma return grad_mu, grad_S, grad_gamma
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
if X2==None:
_K_dist = X[:,None,:] - X[None,:,:]
_K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-2.*_K_dist/np.square(self.lengthscale))
dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
else:
_K_dist = X[:,None,:] - X2[None,:,:]
_K_dist2 = np.square(_K_dist/self.lengthscale).sum(axis=-1)
dK_dX = self.variance*np.exp(-0.5 * self._K_dist2[:,:,None]) * (-_K_dist/np.square(self.lengthscale))
dL_dX = (dL_dK[:,:,None] * dK_dX).sum(axis=1)
return dL_dX
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #
@ -174,78 +137,3 @@ class SSRBF(Stationary):
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :]) self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2) self._K_dvar = np.exp(-0.5 * self._K_dist2)
#@cache_this(1)
def _psi_computations(self, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
# _dpsi1_dvariance NxM
# _dpsi1_dlengthscale NxMxQ
# _dpsi1_dZ NxMxQ
# _dpsi1_dgamma NxMxQ
# _dpsi1_dmu NxMxQ
# _dpsi1_dS NxMxQ
# _psi2 NxMxM
# _psi2_dvariance NxMxM
# _psi2_dlengthscale NxMxMxQ
# _psi2_dZ NxMxMxQ
# _psi2_dgamma NxMxMxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
lengthscale2 = np.square(self.lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / self.lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom)) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent = np.log(np.exp(_psi1_exponent1) + np.exp(_psi1_exponent2)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = self.variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
self._psi1 = self.variance * np.exp(_psi1_exp_sum) # NxM
self._dpsi1_dvariance = self._psi1 / self.variance # NxM
self._dpsi1_dgamma = _psi1_q * (_psi1_exp_dist_sq/_psi1_denom_sqrt-_psi1_exp_Z) # NxMxQ
self._dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
self._dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
self._dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
self._dpsi1_dlengthscale = 2.*self.lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent = np.log(np.exp(_psi2_exponent1) + np.exp(_psi2_exponent2))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = np.square(self.variance) * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
self._psi2 = np.square(self.variance) * np.exp(_psi2_exp_sum) # N,M,M
self._dpsi2_dvariance = 2. * self._psi2/self.variance # NxMxM
self._dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
self._dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
self._dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
self._dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
self._dpsi2_dlengthscale = 2.*self.lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ

View file

@ -12,6 +12,35 @@ from scipy import integrate
from ...util.caching import Cache_this from ...util.caching import Cache_this
class Stationary(Kern): class Stationary(Kern):
"""
Stationary kernels (covariance functions).
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
The lengthscale(s) and variance parameters are added to the structure automatically.
"""
def __init__(self, input_dim, variance, lengthscale, ARD, name): def __init__(self, input_dim, variance, lengthscale, ARD, name):
super(Stationary, self).__init__(input_dim, name) super(Stationary, self).__init__(input_dim, name)
self.ARD = ARD self.ARD = ARD
@ -20,11 +49,11 @@ class Stationary(Kern):
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
else: else:
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad lengthscales" assert lengthscale.size in [1, input_dim], "Bad number of lengthscales"
if lengthscale.size != input_dim: if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale lengthscale = np.ones(input_dim)*lengthscale
else: else:
@ -35,42 +64,43 @@ class Stationary(Kern):
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
def K_of_r(self, r): def K_of_r(self, r):
raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class" raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError, "implement the covaraiance function as a fn of r to use this class" raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
#@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
return self.K_of_r(r) return self.K_of_r(r)
#@Cache_this(limit=5, ignore_args=(0,)) @Cache_this(limit=3, ignore_args=())
def _dist(self, X, X2): def dK_dr_via_X(self, X, X2):
if X2 is None: #a convenience function, so we can cache dK_dr
X2 = X return self.dK_dr(self._scaled_dist(X, X2))
return X[:, None, :] - X2[None, :, :]
#@Cache_this(limit=5, ignore_args=(0,)) @Cache_this(limit=5, ignore_args=(0,))
def _unscaled_dist(self, X, X2=None): def _unscaled_dist(self, X, X2=None):
""" """
Compute the square distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None. each pair of rows of X if X2 is None.
""" """
if X2 is None: if X2 is None:
Xsq = np.sum(np.square(X),1) Xsq = np.sum(np.square(X),1)
return np.sqrt(-2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])) r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
return np.sqrt(r2)
else: else:
X1sq = np.sum(np.square(X),1) X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1) X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
#@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
""" """
Efficiently compute the scaled distance, r. Efficiently compute the scaled distance, r.
r = \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate this case we compute the unscaled distance first (in a separate
@ -84,7 +114,6 @@ class Stationary(Kern):
else: else:
return self._unscaled_dist(X, X2)/self.lengthscale return self._unscaled_dist(X, X2)/self.lengthscale
def Kdiag(self, X): def Kdiag(self, X):
ret = np.empty(X.shape[0]) ret = np.empty(X.shape[0])
ret[:] = self.variance ret[:] = self.variance
@ -95,20 +124,23 @@ class Stationary(Kern):
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
r = self._scaled_dist(X, X2)
K = self.K_of_r(r)
rinv = self._inv_dist(X, X2) self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
dL_dr = self.dK_dr(r) * dL_dK
#now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD: if self.ARD:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 #rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0) #d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else: else:
x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
self.variance.gradient = np.sum(K * dL_dK)/self.variance
def _inv_dist(self, X, X2=None): def _inv_dist(self, X, X2=None):
""" """
@ -116,7 +148,7 @@ class Stationary(Kern):
diagonal, where we return zero (the distance on the diagonal is zero). diagonal, where we return zero (the distance on the diagonal is zero).
This term appears in derviatives. This term appears in derviatives.
""" """
dist = self._scaled_dist(X, X2) dist = self._scaled_dist(X, X2).copy()
if X2 is None: if X2 is None:
nondiag = util.diag.offdiag_view(dist) nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag nondiag[:] = 1./nondiag
@ -128,10 +160,11 @@ class Stationary(Kern):
""" """
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
""" """
r = self._scaled_dist(X, X2)
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr(r) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
#The high-memory numpy way: ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 #The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :]
#ret = np.sum((invdist*dL_dr)[:,:,None]*d,1)/self.lengthscale**2
#if X2 is None: #if X2 is None:
#ret *= 2. #ret *= 2.
@ -141,7 +174,7 @@ class Stationary(Kern):
tmp *= 2. tmp *= 2.
X2 = X X2 = X
ret = np.empty(X.shape, dtype=np.float64) ret = np.empty(X.shape, dtype=np.float64)
[np.copyto(ret[:,q], np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), 1)) for q in xrange(self.input_dim)] [np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
ret /= self.lengthscale**2 ret /= self.lengthscale**2
return ret return ret
@ -214,7 +247,7 @@ class Matern52(Stationary):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name)
@ -225,7 +258,7 @@ class Matern52(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self,F,F1,F2,F3,lower,upper): def Gram_matrix(self, F, F1, F2, F3, lower, upper):
""" """
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.

View file

@ -1,11 +1,10 @@
# Check Matthew Rocklin's blog post. # Check Matthew Rocklin's blog post.
try: try:
import sympy as sp import sympy as sp
sympy_available=True sympy_available=True
from sympy.utilities.lambdify import lambdify from sympy.utilities.lambdify import lambdify
except ImportError: except ImportError:
sympy_available=False sympy_available=False
exit()
import numpy as np import numpy as np
from kern import Kern from kern import Kern
@ -36,7 +35,7 @@ class Sympykern(Kern):
super(Sympykern, self).__init__(input_dim, name) super(Sympykern, self).__init__(input_dim, name)
self._sp_k = k self._sp_k = k
# pull the variable names out of the symbolic covariance function. # pull the variable names out of the symbolic covariance function.
sp_vars = [e for e in k.atoms() if e.is_Symbol] sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
@ -51,7 +50,7 @@ class Sympykern(Kern):
self._sp_kdiag = k self._sp_kdiag = k
for x, z in zip(self._sp_x, self._sp_z): for x, z in zip(self._sp_x, self._sp_z):
self._sp_kdiag = self._sp_kdiag.subs(z, x) self._sp_kdiag = self._sp_kdiag.subs(z, x)
# If it is a multi-output covariance, add an input for indexing the outputs. # If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1 # Check input dim is number of xs + 1 if output_dim is >1
@ -73,47 +72,44 @@ class Sympykern(Kern):
# Extract names of shared parameters (those without a subscript) # Extract names of shared parameters (those without a subscript)
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i) self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
# Add split parameters to the model.
for theta in self._split_theta_names: for theta in self._split_theta_names:
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
setattr(self, theta, Param(theta, np.ones(self.output_dim), None)) setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
self.add_parameters(getattr(self, theta)) self.add_parameter(getattr(self, theta))
#setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sp_theta)
for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j): for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j):
self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i) self._sp_kdiag = self._sp_kdiag.subs(theta_j, theta_i)
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else: else:
self.num_split_params = 0 self.num_split_params = 0
self._split_theta_names = [] self._split_theta_names = []
self._sp_theta = thetas self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta) self.num_shared_params = len(self._sp_theta)
#self.num_params = self.num_shared_params
# Add parameters to the model. # Add parameters to the model.
for theta in self._sp_theta: for theta in self._sp_theta:
val = 1.0 val = 1.0
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
if param is not None: if param is not None:
if param.has_key(theta): if param.has_key(theta):
val = param[theta] val = param[theta]
setattr(self, theta.name, Param(theta.name, val, None)) setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name)) self.add_parameters(getattr(self, theta.name))
#deal with param
#self._set_params(self._get_params())
# Differentiate with respect to parameters. # Differentiate with respect to parameters.
derivative_arguments = self._sp_x + self._sp_theta derivative_arguments = self._sp_x + self._sp_theta
if self.output_dim > 1: if self.output_dim > 1:
derivative_arguments += self._sp_theta_i derivative_arguments += self._sp_theta_i
self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments} self.derivatives = {theta.name : sp.diff(self._sp_k,theta).simplify() for theta in derivative_arguments}
self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments} self.diag_derivatives = {theta.name : sp.diff(self._sp_kdiag,theta).simplify() for theta in derivative_arguments}
# This gives the parameters for the arg list. # This gives the parameters for the arg list.
self.arg_list = self._sp_x + self._sp_z + self._sp_theta self.arg_list = self._sp_x + self._sp_z + self._sp_theta
self.diag_arg_list = self._sp_x + self._sp_theta self.diag_arg_list = self._sp_x + self._sp_theta
@ -127,14 +123,11 @@ class Sympykern(Kern):
# generate the code for the covariance functions # generate the code for the covariance functions
self._gen_code() self._gen_code()
self.parameters_changed() # initializes caches
def __add__(self,other): def __add__(self,other):
return spkern(self._sp_k+other._sp_k) return spkern(self._sp_k+other._sp_k)
def _gen_code(self): def _gen_code(self):
#fn_theano = theano_function([self.arg_lists], [self._sp_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy') self._K_function = lambdify(self.arg_list, self._sp_k, 'numpy')
for key in self.derivatives.keys(): for key in self.derivatives.keys():
setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy')) setattr(self, '_K_diff_' + key, lambdify(self.arg_list, self.derivatives[key], 'numpy'))
@ -143,7 +136,7 @@ class Sympykern(Kern):
for key in self.derivatives.keys(): for key in self.derivatives.keys():
setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy')) setattr(self, '_Kdiag_diff_' + key, lambdify(self.diag_arg_list, self.diag_derivatives[key], 'numpy'))
def K(self,X,X2=None): def K(self,X,X2=None):
self._K_computations(X, X2) self._K_computations(X, X2)
return self._K_function(**self._arguments) return self._K_function(**self._arguments)
@ -151,11 +144,11 @@ class Sympykern(Kern):
def Kdiag(self,X): def Kdiag(self,X):
self._K_computations(X) self._K_computations(X)
return self._Kdiag_function(**self._diag_arguments) return self._Kdiag_function(**self._diag_arguments)
def _param_grad_helper(self,partial,X,Z,target): def _param_grad_helper(self,partial,X,Z,target):
pass pass
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None: #if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2) self._K_computations(X, X2)
@ -174,7 +167,7 @@ class Sympykern(Kern):
gf = getattr(self, '_Kdiag_diff_' + x.name) gf = getattr(self, '_Kdiag_diff_' + x.name)
dX[:, i] = gf(**self._diag_arguments)*dL_dK dX[:, i] = gf(**self._diag_arguments)*dL_dK
return dX return dX
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first # Need to extract parameters to local variables first
self._K_computations(X, X2) self._K_computations(X, X2)
@ -199,7 +192,7 @@ class Sympykern(Kern):
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum() gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
for i in np.arange(self.output_dim)]) for i in np.arange(self.output_dim)])
setattr(parameter, 'gradient', gradient) setattr(parameter, 'gradient', gradient)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self._K_computations(X) self._K_computations(X)
@ -215,7 +208,7 @@ class Sympykern(Kern):
setattr(parameter, 'gradient', setattr(parameter, 'gradient',
np.asarray([a[np.where(self._output_ind==i)].sum() np.asarray([a[np.where(self._output_ind==i)].sum()
for i in np.arange(self.output_dim)])) for i in np.arange(self.output_dim)]))
def _K_computations(self, X, X2=None): def _K_computations(self, X, X2=None):
"""Set up argument lists for the derivatives.""" """Set up argument lists for the derivatives."""
# Could check if this needs doing or not, there could # Could check if this needs doing or not, there could

View file

@ -17,7 +17,7 @@ class Kernel(Mapping):
:type X: ndarray :type X: ndarray
:param output_dim: dimension of output. :param output_dim: dimension of output.
:type output_dim: int :type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.rbf :param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern :type kernel: GPy.kern.kern
""" """
@ -25,7 +25,7 @@ class Kernel(Mapping):
def __init__(self, X, output_dim=1, kernel=None): def __init__(self, X, output_dim=1, kernel=None):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim) Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
if kernel is None: if kernel is None:
kernel = GPy.kern.rbf(self.input_dim) kernel = GPy.kern.RBF(self.input_dim)
self.kern = kernel self.kern = kernel
self.X = X self.X = X
self.num_data = X.shape[0] self.num_data = X.shape[0]
@ -43,7 +43,7 @@ class Kernel(Mapping):
def _set_params(self, x): def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy() self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy() self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self): def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1) self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1) self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)

View file

@ -49,7 +49,6 @@ class BayesianGPLVM(SparseGP):
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.parameters_changed()
def _getstate(self): def _getstate(self):
""" """
@ -150,37 +149,6 @@ class BayesianGPLVM(SparseGP):
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
class BayesianGPLVMWithMissingData(BayesianGPLVM):
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
from ..util.subarray_and_sorting import common_subarrays
self.subarrays = common_subarrays(Y)
import ipdb;ipdb.set_trace()
BayesianGPLVM.__init__(self, Y, input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, name=name, **kwargs)
def parameters_changed(self):
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.KL_divergence()
dL_dmu, dL_dS = self.dL_dmuS()
# dL:
self.X.mean.gradient = dL_dmu
self.X.variance.gradient = dL_dS
# dKL:
self.X.mean.gradient -= self.X.mean
self.X.variance.gradient -= (1. - (1. / (self.X.variance))) * 0.5
if __name__ == '__main__':
import numpy as np
X = np.random.randn(20,2)
W = np.linspace(0,1,10)[None,:]
Y = (X*W).sum(1)
missing = np.random.binomial(1,.1,size=Y.shape)
pass
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
""" """

View file

@ -28,7 +28,6 @@ class GPRegression(GP):
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression')
self.parameters_changed()
def _getstate(self): def _getstate(self):
return GP._getstate(self) return GP._getstate(self)

View file

@ -1,14 +1,17 @@
# ## Copyright (c) 2013, GPy authors (see AUTHORS.txt). # ## Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from GPy.core import Model import numpy as np
from GPy.core import SparseGP
from GPy.util.linalg import PCA
import numpy
import itertools import itertools
import pylab import pylab
from GPy.kern import Kern
from GPy.models.bayesian_gplvm import BayesianGPLVM from ..core import Model, SparseGP
from ..util.linalg import PCA
from ..kern import Kern
from bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
from ..likelihoods.gaussian import Gaussian
class MRD2(Model): class MRD2(Model):
""" """
@ -20,11 +23,101 @@ class MRD2(Model):
to match up, whereas the dimensionality p_d can differ. to match up, whereas the dimensionality p_d can differ.
:param [array-like] Ylist: List of datasets to apply MRD on :param [array-like] Ylist: List of datasets to apply MRD on
:param array-like q_mean: mean of starting latent space q in [n x q] :param input_dim: latent dimensionality
:param array-like q_variance: variance of starting latent space q in [n x q] :type input_dim: int
:param :class:`~GPy.inference.latent_function_inference :param array-like X: mean of starting latent space q in [n x q]
:param array-like X_variance: variance of starting latent space q in [n x q]
:param initx: initialisation method for the latent space :
* 'concat' - PCA on concatenation of all datasets
* 'single' - Concatenation of PCA on datasets, respectively
* 'random' - Random draw from a Normal(0,1)
:type initx: ['concat'|'single'|'random']
:param initz: initialisation method for inducing inputs
:type initz: 'permute'|'random'
:param num_inducing: number of inducing inputs to use
:param Z: initial inducing inputs
:param kernel: list of kernels or kernel to copy for each output
:type kernel: [GPy.kern.kern] | GPy.kern.kern | None (default)
:param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use
:param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use
:param str name: the name of this model
""" """
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihood=None, name='mrd'):
super(MRD2, self).__init__(name)
# sort out the kernels
if kernel is None:
from ..kern import RBF
self.kern = [RBF(input_dim, ARD=1, name='Y_{}'.format(i)) for i in range(len(Ylist))]
elif isinstance(kernel, Kern):
self.kern = [kernel.copy(name='Y_{}'.format(i)) for i in range(len(Ylist))]
else:
assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.input_dim = input_dim
self.num_inducing = num_inducing
self._in_init_ = True
X = self._init_X(initx, Ylist)
self.Z = self._init_Z(initz, X)
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
if X_variance is None:
X_variance = np.random.uniform(0,.2,X.shape)
self.variational_prior = NormalPrior()
self.X = NormalPosterior(X, X_variance)
if likelihood is None:
likelihood = Gaussian()
if inference_method is None:
if any(np.any(np.isnan(y)) for y in Ylist):
self.inference_method = VarDTCMissingData(limit=len(Ylist))
self.Ylist = Ylist
def parameters_changed(self):
for y in self.Ylist:
pass
def _init_X(self, init='PCA', likelihood_list=None):
if likelihood_list is None:
likelihood_list = self.likelihood_list
Ylist = []
for likelihood_or_Y in likelihood_list:
if type(likelihood_or_Y) is np.ndarray:
Ylist.append(likelihood_or_Y)
else:
Ylist.append(likelihood_or_Y.Y)
del likelihood_list
if init in "PCA_concat":
X = PCA(np.hstack(Ylist), self.input_dim)[0]
elif init in "PCA_single":
X = np.zeros((Ylist[0].shape[0], self.input_dim))
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
X[:, qs] = PCA(Y, len(qs))[0]
else: # init == 'random':
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
self.X = X
return X
def _init_Z(self, init="permute", X=None):
if X is None:
X = self.X
if init in "permute":
Z = np.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = np.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z
class MRD(Model): class MRD(Model):
""" """
@ -84,7 +177,7 @@ class MRD(Model):
del self._in_init_ del self._in_init_
self.gref = self.bgplvms[0] self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) nparams = np.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum() self.nparams = nparams.cumsum()
self.num_data = self.gref.num_data self.num_data = self.gref.num_data
@ -216,7 +309,7 @@ class MRD(Model):
X_var = self.gref.X_variance.ravel() X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel() Z = self.gref.Z.ravel()
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms] thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) params = np.hstack([X, X_var, Z, np.hstack(thetas)])
return params return params
# def _set_var_params(self, g, X, X_var, Z): # def _set_var_params(self, g, X, X_var, Z):
@ -239,13 +332,13 @@ class MRD(Model):
# set params for all: # set params for all:
for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
g._set_params(numpy.hstack([X, X_var, Z, thetas[s:e]])) g._set_params(np.hstack([X, X_var, Z, thetas[s:e]]))
# self._set_var_params(g, X, X_var, Z) # self._set_var_params(g, X, X_var, Z)
# self._set_kern_params(g, thetas[s:e].copy()) # self._set_kern_params(g, thetas[s:e].copy())
# g._compute_kernel_matrices() # g._compute_kernel_matrices()
# if self.auto_scale_factor: # if self.auto_scale_factor:
# g.scale_factor = numpy.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision) # g.scale_factor = np.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision)
# # self.scale_factor = numpy.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision) # # self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
# g._computations() # g._computations()
@ -264,48 +357,18 @@ class MRD(Model):
dKLmu, dKLdS = self.gref.dKL_dmuS() dKLmu, dKLdS = self.gref.dKL_dmuS()
dLdmu -= dKLmu dLdmu -= dKLmu
dLdS -= dKLdS dLdS -= dKLdS
dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() dLdmuS = np.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return numpy.hstack((dLdmuS, return np.hstack((dLdmuS,
dldzt1, dldzt1,
numpy.hstack([numpy.hstack([g.dL_dtheta(), np.hstack([np.hstack([g.dL_dtheta(),
g.likelihood._gradients(\ g.likelihood._gradients(\
partial=g.partial_for_likelihood)]) \ partial=g.partial_for_likelihood)]) \
for g in self.bgplvms]))) for g in self.bgplvms])))
def _init_X(self, init='PCA', likelihood_list=None):
if likelihood_list is None:
likelihood_list = self.likelihood_list
Ylist = []
for likelihood_or_Y in likelihood_list:
if type(likelihood_or_Y) is numpy.ndarray:
Ylist.append(likelihood_or_Y)
else:
Ylist.append(likelihood_or_Y.Y)
del likelihood_list
if init in "PCA_concat":
X = PCA(numpy.hstack(Ylist), self.input_dim)[0]
elif init in "PCA_single":
X = numpy.zeros((Ylist[0].shape[0], self.input_dim))
for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.input_dim), len(Ylist)), Ylist):
X[:, qs] = PCA(Y, len(qs))[0]
else: # init == 'random':
X = numpy.random.randn(Ylist[0].shape[0], self.input_dim)
self.X = X
return X
def _init_Z(self, init="permute", X=None):
if X is None:
X = self.X
if init in "permute":
Z = numpy.random.permutation(X.copy())[:self.num_inducing]
elif init in "random":
Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z
def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False): def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
if axes is None: if axes is None:
fig = pylab.figure(num=fignum) fig = pylab.figure(num=fignum)
@ -358,7 +421,7 @@ class MRD(Model):
""" """
if titles is None: if titles is None:
titles = [r'${}$'.format(name) for name in self.names] titles = [r'${}$'.format(name) for name in self.names]
ymax = reduce(max, [numpy.ceil(max(g.input_sensitivity())) for g in self.bgplvms]) ymax = reduce(max, [np.ceil(max(g.input_sensitivity())) for g in self.bgplvms])
def plotf(i, g, ax): def plotf(i, g, ax):
ax.set_ylim([0,ymax]) ax.set_ylim([0,ymax])
g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs) g.kern.plot_ARD(ax=ax, title=titles[i], *args, **kwargs)

View file

@ -8,7 +8,7 @@ from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..util.misc import param_to_array from ..util.misc import param_to_array
from ..core.parameterization.variational import VariationalPosterior from ..core.parameterization.variational import NormalPosterior
class SparseGPRegression(SparseGP): class SparseGPRegression(SparseGP):
""" """
@ -47,7 +47,7 @@ class SparseGPRegression(SparseGP):
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
if not (X_variance is None): if not (X_variance is None):
X = VariationalPosterior(X,X_variance) X = NormalPosterior(X,X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
@ -88,7 +88,7 @@ class SparseGPRegressionUncertainInput(SparseGP):
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
if kernel is None: if kernel is None:
kernel = kern.rbf(input_dim) + kern.white(input_dim, variance=1e-3) kernel = kern.RBF(input_dim) + kern.White(input_dim, variance=1e-3)
# Z defaults to a subset of the data # Z defaults to a subset of the data
if Z is None: if Z is None:
@ -99,5 +99,5 @@ class SparseGPRegressionUncertainInput(SparseGP):
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC())
self.ensure_default_constraints() self.ensure_default_constraints()

View file

@ -36,7 +36,7 @@ class SSGPLVM(SparseGP):
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
@ -48,24 +48,36 @@ class SSGPLVM(SparseGP):
if kernel is None: if kernel is None:
kernel = kern.SSRBF(input_dim) kernel = kern.SSRBF(input_dim)
self.variational_prior = SpikeAndSlabPrior(pi=0.5) # the prior probability of the latent binary variable b pi = np.empty((input_dim))
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
X = SpikeAndSlabPosterior(X, X_variance, gamma) X = SpikeAndSlabPosterior(X, X_variance, gamma)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.add_parameter(self.variational_prior)
def parameters_changed(self): def parameters_changed(self):
super(SSGPLVM, self).parameters_changed() super(SSGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
def input_sensitivity(self):
if self.kern.ARD:
return self.kern.input_sensitivity()
else:
return self.variational_prior.pi
def plot_latent(self, plot_inducing=True, *args, **kwargs): def plot_latent(self, plot_inducing=True, *args, **kwargs):
pass import sys
#return plot_latent.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
def do_test_latents(self, Y): def do_test_latents(self, Y):
""" """

View file

@ -20,7 +20,7 @@ def most_significant_input_dimensions(model, which_indices):
input_1, input_2 = 0, 1 input_1, input_2 = 0, 1
else: else:
try: try:
input_1, input_2 = np.argsort(model.kern.input_sensitivity())[::-1][:2] input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
except: except:
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:

View file

@ -23,7 +23,7 @@ def add_bar_labels(fig, ax, bars, bottom=0):
xi = patch.get_x() + patch.get_width() / 2. xi = patch.get_x() + patch.get_width() / 2.
va = 'top' va = 'top'
c = 'w' c = 'w'
t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, usetex=True, ha='center') t = TextPath((0, 0), "${xi}$".format(xi=xi), rotation=0, ha='center')
transform = transOffset transform = transOffset
if patch.get_extents().height <= t.get_extents().height + 3: if patch.get_extents().height <= t.get_extents().height + 3:
va = 'bottom' va = 'bottom'
@ -106,7 +106,7 @@ def plot(kernel, x=None, plot_limits=None, which_parts='all', resolution=None, *
raise ValueError, "Bad limits for plotting" raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None] Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = kernel.K(Xnew, x, which_parts) Kx = kernel.K(Xnew, x)
pb.plot(Xnew, Kx, *args, **kwargs) pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax) pb.xlim(xmin, xmax)
pb.xlabel("x") pb.xlabel("x")

View file

@ -56,10 +56,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
X, Y = param_to_array(model.X, model.Y) if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): X_variance = model.X_variance X = model.X.mean
X_variance = param_to_array(model.X.variance)
else:
X = model.X
X, Y = param_to_array(X, model.Y)
if hasattr(model, 'Z'): Z = param_to_array(model.Z) if hasattr(model, 'Z'): Z = param_to_array(model.Z)
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
@ -98,10 +101,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add error bars for uncertain (if input uncertainty is being modelled) #add error bars for uncertain (if input uncertainty is being modelled)
#if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
# ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
# xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
# ecolor='k', fmt=None, elinewidth=.5, alpha=.5) ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values

View file

@ -44,3 +44,48 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
pb.draw() pb.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95)) fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig return fig
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
"""
Plot latent space X in 1D:
- if fig is given, create input_dim subplots in fig and plot in these
- if ax is given plot input_dim 1D latent space plots of X into each `axis`
- if neither fig nor ax is given create a figure with fignum and plot in there
colors:
colors of different latent space dimensions input_dim
"""
if ax is None:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
if colors is None:
colors = pb.gca()._get_lines.color_cycle
pb.clf()
else:
colors = iter(colors)
plots = []
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
x = np.arange(means.shape[0])
for i in range(means.shape[1]):
# mean and variance plot
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1)
a.plot(means, c='k', alpha=.3)
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
means.T[i] - 2 * np.sqrt(variances.T[i]),
means.T[i] + 2 * np.sqrt(variances.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
a.legend(borderaxespad=0.)
a.set_xlim(x.min(), x.max())
if i < means.shape[1] - 1:
a.set_xticklabels('')
# binary prob plot
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2)
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center')
a.set_xlim(x.min(), x.max())
a.set_ylim([0.,1.])
pb.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig

View file

@ -10,7 +10,7 @@ from functools import partial
#np.random.seed(300) #np.random.seed(300)
#np.random.seed(7) #np.random.seed(7)
np.seterr(divide='raise') #np.seterr(divide='raise')
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
""" """
If we have a instance method that needs to be called but that doesn't If we have a instance method that needs to be called but that doesn't
@ -350,7 +350,7 @@ class TestNoiseModels(object):
def t_logpdf(self, model, Y, f): def t_logpdf(self, model, Y, f):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
print model._get_params() #print model._get_params()
np.testing.assert_almost_equal( np.testing.assert_almost_equal(
model.pdf(f.copy(), Y.copy()), model.pdf(f.copy(), Y.copy()),
np.exp(model.logpdf(f.copy(), Y.copy())) np.exp(model.logpdf(f.copy(), Y.copy()))
@ -664,7 +664,8 @@ class LaplaceTests(unittest.TestCase):
print m1 print m1
print m2 print m2
m2._set_params(m1._get_params()) m2.parameters_changed()
#m2._set_params(m1._get_params())
#Predict for training points to get posterior mean and variance #Predict for training points to get posterior mean and variance
post_mean, post_var, _, _ = m1.predict(X) post_mean, post_var, _, _ = m1.predict(X)
@ -700,7 +701,8 @@ class LaplaceTests(unittest.TestCase):
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check marginals are the same with random #Check marginals are the same with random
m1.randomize() m1.randomize()
m2._set_params(m1._get_params()) #m2._set_params(m1._get_params())
m2.parameters_changed()
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check they are checkgradding #Check they are checkgradding

View file

@ -0,0 +1,138 @@
'''
Created on 27 Feb 2014
@author: maxz
'''
import unittest
from GPy.core.parameterization.parameterized import Parameterized
from GPy.core.parameterization.param import Param
import numpy
# One trigger in init
_trigger_start = -1
class ParamTestParent(Parameterized):
parent_changed_count = _trigger_start
def parameters_changed(self):
self.parent_changed_count += 1
class ParameterizedTest(Parameterized):
# One trigger after initialization
params_changed_count = _trigger_start
def parameters_changed(self):
self.params_changed_count += 1
def _set_params(self, params, trigger_parent=True):
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
class Test(unittest.TestCase):
def setUp(self):
self.parent = ParamTestParent('test parent')
self.par = ParameterizedTest('test model')
self.par2 = ParameterizedTest('test model 2')
self.p = Param('test parameter', numpy.random.normal(1,2,(10,3)))
self.par.add_parameter(self.p)
self.par.add_parameter(Param('test1', numpy.random.normal(0,1,(1,))))
self.par.add_parameter(Param('test2', numpy.random.normal(0,1,(1,))))
self.par2.add_parameter(Param('par2 test1', numpy.random.normal(0,1,(1,))))
self.par2.add_parameter(Param('par2 test2', numpy.random.normal(0,1,(1,))))
self.parent.add_parameter(self.par)
self.parent.add_parameter(self.par2)
self._observer_triggered = None
self._trigger_count = 0
self._first = None
self._second = None
def _trigger(self, which):
self._observer_triggered = float(which)
self._trigger_count += 1
if self._first is not None:
self._second = self._trigger
else:
self._first = self._trigger
def _trigger_priority(self, which):
if self._first is not None:
self._second = self._trigger_priority
else:
self._first = self._trigger_priority
def test_observable(self):
self.par.add_observer(self, self._trigger, -1)
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.p[0,1] = 3 # trigger observers
self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 1, 'params changed once')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.remove_observer(self)
self.p[2,1] = 4
self.assertEqual(self._observer_triggered, 3, 'observer should not have triggered')
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.add_observer(self, self._trigger, -1)
self.p[2,1] = 4
self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.remove_observer(self, self._trigger)
self.p[0,1] = 3
self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 4, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
def test_set_params(self):
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.par._param_array_[:] = 1
self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.par._param_array_[:] = 2
self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
def test_priority_notify(self):
self.assertEqual(self.par.params_changed_count, 0)
self.par.notify_observers(0, None)
self.assertEqual(self.par.params_changed_count, 1)
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.par.notify_observers(0, -numpy.inf)
self.assertEqual(self.par.params_changed_count, 2)
self.assertEqual(self.parent.parent_changed_count, 1)
def test_priority(self):
self.par.add_observer(self, self._trigger, -1)
self.par.add_observer(self, self._trigger_priority, 0)
self.par.notify_observers(0)
self.assertEqual(self._first, self._trigger_priority, 'priority should be first')
self.assertEqual(self._second, self._trigger, 'priority should be first')
self.par.remove_observer(self)
self._first = self._second = None
self.par.add_observer(self, self._trigger, 1)
self.par.add_observer(self, self._trigger_priority, 0)
self.par.notify_observers(0)
self.assertEqual(self._first, self._trigger, 'priority should be second')
self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -6,6 +6,7 @@ Created on Feb 13, 2014
import unittest import unittest
import GPy import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError
class Test(unittest.TestCase): class Test(unittest.TestCase):
@ -21,6 +22,10 @@ class Test(unittest.TestCase):
self.test1.add_parameter(self.rbf, 0) self.test1.add_parameter(self.rbf, 0)
self.test1.add_parameter(self.param) self.test1.add_parameter(self.param)
x = np.linspace(-2,6,4)[:,None]
y = np.sin(x)
self.testmodel = GPy.models.GPRegression(x,y)
def test_add_parameter(self): def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.rbf._parent_index_, 0)
self.assertEquals(self.white._parent_index_, 1) self.assertEquals(self.white._parent_index_, 1)
@ -37,7 +42,6 @@ class Test(unittest.TestCase):
self.test1.add_parameter(self.white, 0) self.test1.add_parameter(self.white, 0)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
def test_remove_parameter(self): def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix() self.white.fix()
@ -65,7 +69,7 @@ class Test(unittest.TestCase):
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.test1.add_parameter(self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
def test_default_constraints(self): def test_default_constraints(self):
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
@ -88,6 +92,18 @@ class Test(unittest.TestCase):
self.assertEqual(self.rbf.constraints._offset, 0) self.assertEqual(self.rbf.constraints._offset, 0)
self.assertEqual(self.param.constraints._offset, 3) self.assertEqual(self.param.constraints._offset, 3)
def test_fixing_randomize(self):
self.white.fix(warning=False)
val = float(self.test1.white.variance)
self.test1.randomize()
self.assertEqual(val, self.white.variance)
def test_fixing_optimize(self):
self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale)
self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale)
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main() unittest.main()

View file

@ -9,8 +9,8 @@ import numpy as np
from GPy import testing from GPy import testing
import sys import sys
import numpy import numpy
from GPy.kern.parts.rbf import RBF from GPy.kern import RBF
from GPy.kern.parts.linear import Linear from GPy.kern import Linear
from copy import deepcopy from copy import deepcopy
__test__ = lambda: 'deep' in sys.argv __test__ = lambda: 'deep' in sys.argv
@ -36,7 +36,7 @@ class Test(unittest.TestCase):
indices = numpy.cumsum(i_s_dim_list).tolist() indices = numpy.cumsum(i_s_dim_list).tolist()
input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)] input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)]
#input_slices[2] = deepcopy(input_slices[1]) #input_slices[2] = deepcopy(input_slices[1])
input_slice_kern = GPy.kern.kern(9, input_slice_kern = GPy.kern.kern(9,
[ [
RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True), RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True),
RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True), RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True),
@ -51,8 +51,8 @@ class Test(unittest.TestCase):
# GPy.kern.bias(self.input_dim) + # GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)), # GPy.kern.white(self.input_dim)),
(#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) (#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) GPy.kern.Linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
+GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +GPy.kern.RBF(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# +GPy.kern.bias(self.input_dim) # +GPy.kern.bias(self.input_dim)
# +GPy.kern.white(self.input_dim)), # +GPy.kern.white(self.input_dim)),
), ),

View file

@ -25,10 +25,10 @@ class PsiStatModel(Model):
self.kern = kernel self.kern = kernel
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
self.add_parameters(self.X, self.X_variance, self.Z, self.kern) self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
def log_likelihood(self): def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def parameters_changed(self): def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
self.X.gradient = psimu self.X.gradient = psimu
@ -43,9 +43,9 @@ class PsiStatModel(Model):
if self.which == 'psi0': dL_dpsi0 += 1 if self.which == 'psi0': dL_dpsi0 += 1
if self.which == 'psi1': dL_dpsi1 += 1 if self.which == 'psi1': dL_dpsi1 += 1
if self.which == 'psi2': dL_dpsi2 += 1 if self.which == 'psi2': dL_dpsi2 += 1
self.kern.update_gradients_variational(numpy.zeros([1,1]), self.kern.update_gradients_variational(numpy.zeros([1,1]),
dL_dpsi0, dL_dpsi0,
dL_dpsi1, dL_dpsi1,
dL_dpsi2, self.X, self.X_variance, self.Z) dL_dpsi2, self.X, self.X_variance, self.Z)
class DPsiStatTest(unittest.TestCase): class DPsiStatTest(unittest.TestCase):
@ -57,14 +57,14 @@ class DPsiStatTest(unittest.TestCase):
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:num_inducing] Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, input_dim)) Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] # kernels = [GPy.kern.Linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.RBF(input_dim, ARD=True), GPy.kern.Bias(input_dim)]
kernels = [ kernels = [
GPy.kern.linear(input_dim), GPy.kern.Linear(input_dim),
GPy.kern.rbf(input_dim), GPy.kern.RBF(input_dim),
#GPy.kern.bias(input_dim), #GPy.kern.Bias(input_dim),
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim), #GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim),
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) #GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim)
] ]
def testPsi0(self): def testPsi0(self):
@ -73,7 +73,7 @@ class DPsiStatTest(unittest.TestCase):
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_))) assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi1(self): def testPsi1(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
@ -119,11 +119,11 @@ if __name__ == "__main__":
if interactive: if interactive:
# N, num_inducing, input_dim, input_dim = 30, 5, 4, 30 # N, num_inducing, input_dim, input_dim = 30, 5, 4, 30
# X = numpy.random.rand(N, input_dim) # X = numpy.random.rand(N, input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
# K = k.K(X) # K = k.K(X)
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T # Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T
# Y -= Y.mean(axis=0) # Y -= Y.mean(axis=0)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) # m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
# m.randomize() # m.randomize()
# # self.assertTrue(m.checkgrad()) # # self.assertTrue(m.checkgrad())
@ -136,11 +136,11 @@ if __name__ == "__main__":
X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:num_inducing] Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, D)) Y = X.dot(numpy.random.randn(input_dim, D))
# kernel = GPy.kern.bias(input_dim) # kernel = GPy.kern.Bias(input_dim)
# #
# kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), # kernels = [GPy.kern.Linear(input_dim), GPy.kern.RBF(input_dim), GPy.kern.Bias(input_dim),
# GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim), # GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim),
# GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)] # GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim)]
# for k in kernels: # for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
@ -148,32 +148,32 @@ if __name__ == "__main__":
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
# #
m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)+GPy.kern.bias(input_dim)) num_inducing=num_inducing, kernel=GPy.kern.RBF(input_dim)+GPy.kern.Bias(input_dim))
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=kernel) # num_inducing=num_inducing, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=kernel) # num_inducing=num_inducing, kernel=kernel)
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) # num_inducing=num_inducing, kernel=GPy.kern.RBF(input_dim))
# m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) # num_inducing=num_inducing, kernel=GPy.kern.Linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
# + GPy.kern.bias(input_dim)) # + GPy.kern.Bias(input_dim))
# m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, # num_inducing=num_inducing,
# kernel=( # kernel=(
# GPy.kern.rbf(input_dim, ARD=1) # GPy.kern.RBF(input_dim, ARD=1)
# +GPy.kern.linear(input_dim, ARD=1) # +GPy.kern.Linear(input_dim, ARD=1)
# +GPy.kern.bias(input_dim)) # +GPy.kern.Bias(input_dim))
# ) # )
# m.ensure_default_constraints() # m.ensure_default_constraints()
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
num_inducing=num_inducing, kernel=( num_inducing=num_inducing, kernel=(
GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) GPy.kern.RBF(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1)
#+GPy.kern.linear(input_dim, numpy.random.rand(input_dim), ARD=1) #+GPy.kern.Linear(input_dim, numpy.random.rand(input_dim), ARD=1)
#+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1) #+GPy.kern.RBF(input_dim, numpy.random.rand(), numpy.random.rand(input_dim), ARD=1)
#+GPy.kern.rbf(input_dim, numpy.random.rand(), numpy.random.rand(), ARD=0) #+GPy.kern.RBF(input_dim, numpy.random.rand(), numpy.random.rand(), ARD=0)
+GPy.kern.bias(input_dim) +GPy.kern.Bias(input_dim)
+GPy.kern.white(input_dim) +GPy.kern.White(input_dim)
) )
) )
m2.ensure_default_constraints() m2.ensure_default_constraints()

View file

@ -10,10 +10,10 @@ class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -21,10 +21,10 @@ class sparse_GPLVMTests(unittest.TestCase):
def test_linear_kern(self): def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -32,10 +32,10 @@ class sparse_GPLVMTests(unittest.TestCase):
def test_rbf_kern(self): def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -33,7 +33,7 @@ class GradientTests(unittest.TestCase):
# Get model type (GPRegression, SparseGPRegression, etc) # Get model type (GPRegression, SparseGPRegression, etc)
model_fit = getattr(GPy.models, model_type) model_fit = getattr(GPy.models, model_type)
# noise = GPy.kern.white(dimension) # noise = GPy.kern.White(dimension)
kern = kern # + noise kern = kern # + noise
if uncertain_inputs: if uncertain_inputs:
m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1])) m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1]))
@ -45,17 +45,17 @@ class GradientTests(unittest.TestCase):
def test_GPRegression_rbf_1d(self): def test_GPRegression_rbf_1d(self):
''' Testing the GP regression with rbf kernel with white kernel on 1d data ''' ''' Testing the GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1) rbf = GPy.kern.RBF(1)
self.check_model(rbf, model_type='GPRegression', dimension=1) self.check_model(rbf, model_type='GPRegression', dimension=1)
def test_GPRegression_rbf_2D(self): def test_GPRegression_rbf_2D(self):
''' Testing the GP regression with rbf kernel on 2d data ''' ''' Testing the GP regression with rbf kernel on 2d data '''
rbf = GPy.kern.rbf(2) rbf = GPy.kern.RBF(2)
self.check_model(rbf, model_type='GPRegression', dimension=2) self.check_model(rbf, model_type='GPRegression', dimension=2)
def test_GPRegression_rbf_ARD_2D(self): def test_GPRegression_rbf_ARD_2D(self):
''' Testing the GP regression with rbf kernel on 2d data ''' ''' Testing the GP regression with rbf kernel on 2d data '''
k = GPy.kern.rbf(2, ARD=True) k = GPy.kern.RBF(2, ARD=True)
self.check_model(k, model_type='GPRegression', dimension=2) self.check_model(k, model_type='GPRegression', dimension=2)
def test_GPRegression_mlp_1d(self): def test_GPRegression_mlp_1d(self):
@ -65,7 +65,7 @@ class GradientTests(unittest.TestCase):
def test_GPRegression_poly_1d(self): def test_GPRegression_poly_1d(self):
''' Testing the GP regression with polynomial kernel with white kernel on 1d data ''' ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
mlp = GPy.kern.poly(1, degree=5) mlp = GPy.kern.Poly(1, degree=5)
self.check_model(mlp, model_type='GPRegression', dimension=1) self.check_model(mlp, model_type='GPRegression', dimension=1)
def test_GPRegression_matern52_1D(self): def test_GPRegression_matern52_1D(self):
@ -100,80 +100,80 @@ class GradientTests(unittest.TestCase):
def test_GPRegression_exponential_1D(self): def test_GPRegression_exponential_1D(self):
''' Testing the GP regression with exponential kernel on 1d data ''' ''' Testing the GP regression with exponential kernel on 1d data '''
exponential = GPy.kern.exponential(1) exponential = GPy.kern.Exponential(1)
self.check_model(exponential, model_type='GPRegression', dimension=1) self.check_model(exponential, model_type='GPRegression', dimension=1)
def test_GPRegression_exponential_2D(self): def test_GPRegression_exponential_2D(self):
''' Testing the GP regression with exponential kernel on 2d data ''' ''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2) exponential = GPy.kern.Exponential(2)
self.check_model(exponential, model_type='GPRegression', dimension=2) self.check_model(exponential, model_type='GPRegression', dimension=2)
def test_GPRegression_exponential_ARD_2D(self): def test_GPRegression_exponential_ARD_2D(self):
''' Testing the GP regression with exponential kernel on 2d data ''' ''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2, ARD=True) exponential = GPy.kern.Exponential(2, ARD=True)
self.check_model(exponential, model_type='GPRegression', dimension=2) self.check_model(exponential, model_type='GPRegression', dimension=2)
def test_GPRegression_bias_kern_1D(self): def test_GPRegression_bias_kern_1D(self):
''' Testing the GP regression with bias kernel on 1d data ''' ''' Testing the GP regression with bias kernel on 1d data '''
bias = GPy.kern.bias(1) bias = GPy.kern.Bias(1)
self.check_model(bias, model_type='GPRegression', dimension=1) self.check_model(bias, model_type='GPRegression', dimension=1)
def test_GPRegression_bias_kern_2D(self): def test_GPRegression_bias_kern_2D(self):
''' Testing the GP regression with bias kernel on 2d data ''' ''' Testing the GP regression with bias kernel on 2d data '''
bias = GPy.kern.bias(2) bias = GPy.kern.Bias(2)
self.check_model(bias, model_type='GPRegression', dimension=2) self.check_model(bias, model_type='GPRegression', dimension=2)
def test_GPRegression_linear_kern_1D_ARD(self): def test_GPRegression_linear_kern_1D_ARD(self):
''' Testing the GP regression with linear kernel on 1d data ''' ''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1, ARD=True) linear = GPy.kern.Linear(1, ARD=True)
self.check_model(linear, model_type='GPRegression', dimension=1) self.check_model(linear, model_type='GPRegression', dimension=1)
def test_GPRegression_linear_kern_2D_ARD(self): def test_GPRegression_linear_kern_2D_ARD(self):
''' Testing the GP regression with linear kernel on 2d data ''' ''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2, ARD=True) linear = GPy.kern.Linear(2, ARD=True)
self.check_model(linear, model_type='GPRegression', dimension=2) self.check_model(linear, model_type='GPRegression', dimension=2)
def test_GPRegression_linear_kern_1D(self): def test_GPRegression_linear_kern_1D(self):
''' Testing the GP regression with linear kernel on 1d data ''' ''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1) linear = GPy.kern.Linear(1)
self.check_model(linear, model_type='GPRegression', dimension=1) self.check_model(linear, model_type='GPRegression', dimension=1)
def test_GPRegression_linear_kern_2D(self): def test_GPRegression_linear_kern_2D(self):
''' Testing the GP regression with linear kernel on 2d data ''' ''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2) linear = GPy.kern.Linear(2)
self.check_model(linear, model_type='GPRegression', dimension=2) self.check_model(linear, model_type='GPRegression', dimension=2)
def test_SparseGPRegression_rbf_white_kern_1d(self): def test_SparseGPRegression_rbf_white_kern_1d(self):
''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data ''' ''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1) rbf = GPy.kern.RBF(1)
self.check_model(rbf, model_type='SparseGPRegression', dimension=1) self.check_model(rbf, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_white_kern_2D(self): def test_SparseGPRegression_rbf_white_kern_2D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data ''' ''' Testing the sparse GP regression with rbf kernel on 2d data '''
rbf = GPy.kern.rbf(2) rbf = GPy.kern.RBF(2)
self.check_model(rbf, model_type='SparseGPRegression', dimension=2) self.check_model(rbf, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_1D(self): def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data ''' ''' Testing the sparse GP regression with rbf kernel on 2d data '''
rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_linear_white_kern_2D(self): def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data ''' ''' Testing the sparse GP regression with rbf kernel on 2d data '''
rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
#@unittest.expectedFailure #@unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.rbf(2) + GPy.kern.linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!") raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
#@unittest.expectedFailure #@unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.rbf(1) + GPy.kern.linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
raise unittest.SkipTest("This is not implemented yet!") raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1)
@ -181,7 +181,7 @@ class GradientTests(unittest.TestCase):
""" Testing GPLVM with rbf + bias kernel """ """ Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2 N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) k = GPy.kern.RBF(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, kernel=k) m = GPy.models.GPLVM(Y, input_dim, kernel=k)
@ -191,7 +191,7 @@ class GradientTests(unittest.TestCase):
""" Testing GPLVM with rbf + bias kernel """ """ Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2 N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k) m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
@ -201,7 +201,7 @@ class GradientTests(unittest.TestCase):
N = 20 N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.RBF(1)
m = GPy.models.GPClassification(X,Y,kernel=kernel) m = GPy.models.GPClassification(X,Y,kernel=kernel)
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -211,7 +211,7 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None] Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli() #distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
#likelihood = GPy.likelihoods.EP(Y, distribution) #likelihood = GPy.likelihoods.EP(Y, distribution)
@ -223,7 +223,7 @@ class GradientTests(unittest.TestCase):
def test_generalized_FITC(self): def test_generalized_FITC(self):
N = 20 N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
m = GPy.models.FITCClassification(X, Y, kernel = k) m = GPy.models.FITCClassification(X, Y, kernel = k)
m.update_likelihood_approximation() m.update_likelihood_approximation()
@ -237,7 +237,7 @@ class GradientTests(unittest.TestCase):
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -250,7 +250,7 @@ class GradientTests(unittest.TestCase):
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -1,4 +1,5 @@
from ..core.parameterization.parameter_core import Observable from ..core.parameterization.parameter_core import Observable
import itertools
class Cacher(object): class Cacher(object):
""" """
@ -38,8 +39,11 @@ class Cacher(object):
if not all([isinstance(arg, Observable) for arg in observable_args]): if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args) return self.operation(*args)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args)
#if the result is cached, return the cached computation #if the result is cached, return the cached computation
state = [all(a is b for a, b in zip(args, cached_i)) for cached_i in self.cached_inputs] state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
if any(state): if any(state):
i = state.index(True) i = state.index(True)
if self.inputs_changed[i]: if self.inputs_changed[i]:

View file

@ -78,24 +78,24 @@ def force_F_ordered(A):
# return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1) # return jitchol(A+np.eye(A.shape[0])*jitter, maxtries-1)
def jitchol(A, maxtries=5): def jitchol(A, maxtries=5):
A = np.ascontiguousarray(A) A = np.ascontiguousarray(A)
L, info = lapack.dpotrf(A, lower=1) L, info = lapack.dpotrf(A, lower=1)
if info == 0: if info == 0:
return L return L
else: else:
diagA = np.diag(A) diagA = np.diag(A)
if np.any(diagA <= 0.): if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements" raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6 jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter): while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter) print 'Warning: adding jitter of {:.10e}'.format(jitter)
try: try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except: except:
jitter *= 10 jitter *= 10
finally: finally:
maxtries -= 1 maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter." raise linalg.LinAlgError, "not positive definite, even with jitter."