dumb merge conflict in a comment

This commit is contained in:
James Hensman 2014-02-11 13:44:18 +00:00
commit 103aaebb29
60 changed files with 916 additions and 826 deletions

View file

@ -260,17 +260,18 @@ class Model(Parameterized):
""" """
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names() # param_names = self._get_param_names()
for s in positive_strings:
paramlist = self.grep_param_names(".*"+s) # for s in positive_strings:
for param in paramlist: # paramlist = self.grep_param_names(".*"+s)
for p in param.flattened_parameters: # for param in paramlist:
rav_i = set(self._raveled_index_for(p)) # for p in param.flattened_parameters:
for constraint in self.constraints.iter_properties(): # rav_i = set(self._raveled_index_for(p))
rav_i -= set(self._constraint_indices(p, constraint)) # for constraint in self.constraints.iter_properties():
rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0]) # rav_i -= set(self._constraint_indices(p, constraint))
ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int)) # rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
if ind.size != 0: # ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
p[np.unravel_index(ind, p.shape)].constrain_positive() # if ind.size != 0:
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
# if paramlist: # if paramlist:
# self.__getitem__(None, paramlist).constrain_positive(warning=warning) # self.__getitem__(None, paramlist).constrain_positive(warning=warning)
# currently_constrained = self.all_constrained_indices() # currently_constrained = self.all_constrained_indices()
@ -380,7 +381,7 @@ class Model(Parameterized):
sgd.run() sgd.run()
self.optimization_runs.append(sgd) self.optimization_runs.append(sgd)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the ,odel by comparing to a numerical Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual estimate. If the verbose flag is passed, invividual
@ -405,8 +406,8 @@ class Model(Parameterized):
dx = step * np.sign(np.random.uniform(-1, 1, x.size)) dx = step * np.sign(np.random.uniform(-1, 1, x.size))
# evaulate around the point x # evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx) f1 = self.objective_function(x + dx)
f2, g2 = self.objective_and_gradients(x - dx) f2 = self.objective_function(x - dx)
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx) numerical_gradient = (f1 - f2) / (2 * dx)
@ -416,7 +417,7 @@ class Model(Parameterized):
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
names = self._get_param_names_transformed() names = self._get_param_names()
except NotImplementedError: except NotImplementedError:
names = ['Variable %i' % i for i in range(len(x))] names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing # Prepare for pretty-printing
@ -430,38 +431,45 @@ class Model(Parameterized):
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print '\n'.join([header_string[0], separator])
if target_param is None: if target_param is None:
param_list = range(len(x)) param_list = range(len(x))
else: else:
param_list = self.grep_param_names(target_param, transformed=True, search=True) param_list = self._raveled_index_for(target_param)
if not np.any(param_list): if self._has_fixes():
param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
if param_list.size == 0:
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self.objective_function_gradients(x)
for i in param_list: np.where(gradient==0, 1e-312, gradient)
ret = True
for i, ind in enumerate(param_list):
xx = x.copy() xx = x.copy()
xx[i] += step xx[ind] += step
f1, g1 = self.objective_and_gradients(xx) f1 = self.objective_function(xx)
xx[i] -= 2.*step xx[ind] -= 2.*step
f2, g2 = self.objective_and_gradients(xx) f2 = self.objective_function(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) ratio = (f1 - f2) / (2 * step * gradient[ind])
difference = np.abs((f1 - f2) / 2 / step - gradient) difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i]) formatted_name = "\033[92m {0} \033[0m".format(names[ind])
ret &= True
else: else:
formatted_name = "\033[91m {0} \033[0m".format(names[i]) formatted_name = "\033[91m {0} \033[0m".format(names[ind])
ret &= False
r = '%.6f' % float(ratio) r = '%.6f' % float(ratio)
d = '%.6f' % float(difference) d = '%.6f' % float(difference)
g = '%.6f' % gradient g = '%.6f' % gradient[ind]
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string
return ret
def input_sensitivity(self): def input_sensitivity(self):
""" """

View file

@ -15,8 +15,36 @@ class ListArray(np.ndarray):
def __new__(cls, input_array): def __new__(cls, input_array):
obj = np.asanyarray(input_array).view(cls) obj = np.asanyarray(input_array).view(cls)
return obj return obj
def __eq__(self, other): #def __eq__(self, other):
return other is self # return other is self
class ParamList(list):
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass
class C(np.ndarray):
__array_priority__ = 1.
def __new__(cls, array):
obj = array.view(cls)
return obj
#def __array_finalize__(self, obj):
# #print 'finalize'
# return obj
def __array_prepare__(self, out_arr, context):
#print 'prepare'
while type(out_arr) is C:
out_arr = out_arr.base
return out_arr
def __array_wrap__(self, out_arr, context):
#print 'wrap', type(self), type(out_arr), context
while type(out_arr) is C:
out_arr = out_arr.base
return out_arr
class ObservableArray(ListArray, Observable): class ObservableArray(ListArray, Observable):
""" """
@ -25,7 +53,7 @@ class ObservableArray(ListArray, Observable):
will be called every time this array changes. The callable will be called every time this array changes. The callable
takes exactly one argument, which is this array itself. takes exactly one argument, which is this array itself.
""" """
__array_priority__ = 0 # Never give back Param __array_priority__ = -1 # Never give back ObservableArray
def __new__(cls, input_array): def __new__(cls, input_array):
cls.__name__ = "ObservableArray\n " cls.__name__ = "ObservableArray\n "
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls) obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
@ -36,16 +64,19 @@ class ObservableArray(ListArray, Observable):
if obj is None: return if obj is None: return
self._observers_ = getattr(obj, '_observers_', None) self._observers_ = getattr(obj, '_observers_', None)
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val, update=True):
if self.ndim: super(ObservableArray, self).__setitem__(s, val)
if not np.all(np.equal(self[s], val)): if update:
super(ObservableArray, self).__setitem__(s, val) self._notify_observers()
if update: # if self.ndim:
self._notify_observers() # if not np.all(np.equal(self[s], val)):
else: # super(ObservableArray, self).__setitem__(s, val)
if not np.all(np.equal(self, val)): # if update:
super(ObservableArray, self).__setitem__(Ellipsis, val) # self._notify_observers()
if update: # else:
self._notify_observers() # if not np.all(np.equal(self, val)):
# super(ObservableArray, self).__setitem__(Ellipsis, val)
# if update:
# self._notify_observers()
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop)) return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val): def __setslice__(self, start, stop, val):

View file

@ -90,11 +90,6 @@ class ParameterIndexOperations(object):
return self._properties.values() return self._properties.values()
def properties_for(self, index): def properties_for(self, index):
# already_seen = dict()
# for ni in index:
# if ni not in already_seen:
# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]]
# yield already_seen[ni]
return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index) return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
def add(self, prop, indices): def add(self, prop, indices):
@ -111,70 +106,11 @@ class ParameterIndexOperations(object):
self._properties[prop] = diff self._properties[prop] = diff
else: else:
del self._properties[prop] del self._properties[prop]
#[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]]
return removed.astype(int) return removed.astype(int)
# else:
# for a in self.properties():
# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_:
# ind = create_raveled_indices(indices, shape, offset)
# diff = remove_indices(self[a], ind)
# removed = numpy.intersect1d(self[a], ind, True)
# if not index_empty(diff):
# self._properties[a] = diff
# else:
# del self._properties[a]
# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]]
# return removed.astype(int)
return numpy.array([]).astype(int) return numpy.array([]).astype(int)
def __getitem__(self, prop): def __getitem__(self, prop):
return self._properties[prop] return self._properties[prop]
# class TieIndexOperations(object):
# def __init__(self, params):
# self.params = params
# self.tied_from = ParameterIndexOperations()
# self.tied_to = ParameterIndexOperations()
# def add(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# self.tied_from.add(tied_to, rav_from)
# self.tied_to.add(tied_to, rav_to)
# return rav_from, rav_to
# def remove(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# rem_from = self.tied_from.remove(tied_to, rav_from)
# rem_to = self.tied_to.remove(tied_to, rav_to)
# left_from = self.tied_from._properties.pop(tied_to)
# left_to = self.tied_to._properties.pop(tied_to)
# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from
# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to
# return rav_from, rav_to
# def from_to_for(self, index):
# return self.tied_from.properties_for(index), self.tied_to.properties_for(index)
# def iter_from_to_indices(self):
# for k, f in self.tied_from.iteritems():
# yield f, self.tied_to[k]
# def iter_to_indices(self):
# return self.tied_to.iterindices()
# def iter_from_indices(self):
# return self.tied_from.iterindices()
# def iter_from_items(self):
# for f, i in self.tied_from.iteritems():
# yield f, i
# def iter_properties(self):
# return self.tied_from.iter_properties()
# def properties(self):
# return self.tied_from.properties()
# def from_to_indices(self, param):
# return self.tied_from[param], self.tied_to[param]
#
# # def create_raveled_indices(index, shape, offset=0):
# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index)
# # else: i = [slice(None), index]
# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset)
# # return ind
def combine_indices(arr1, arr2): def combine_indices(arr1, arr2):
return numpy.union1d(arr1, arr2) return numpy.union1d(arr1, arr2)

View file

@ -3,8 +3,8 @@
import itertools import itertools
import numpy import numpy
from parameter_core import Constrainable, adjust_name_for_printing from parameter_core import Constrainable, Gradcheckable, adjust_name_for_printing
from array_core import ObservableArray from array_core import ObservableArray, ParamList
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -20,12 +20,15 @@ class Float(numpy.float64, Constrainable):
self._base = base self._base = base
class Param(ObservableArray, Constrainable): class Param(ObservableArray, Constrainable, Gradcheckable):
""" """
Parameter object for GPy models. Parameter object for GPy models.
:param name: name of the parameter to be printed :param str name: name of the parameter to be printed
:param input_array: array which this parameter handles :param input_array: array which this parameter handles
:type input_array: numpy.ndarray
:param default_constraint: The default constraint for this parameter
:type default_constraint:
You can add/remove constraints by calling constrain on the parameter itself, e.g: You can add/remove constraints by calling constrain on the parameter itself, e.g:
@ -40,19 +43,10 @@ class Param(ObservableArray, Constrainable):
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
This ndarray can be stored in lists and checked if it is in.
>>> import numpy as np
>>> x = np.random.normal(size=(10,3))
>>> x in [[1], x, [3]]
True
WARNING: This overrides the functionality of x==y!!!
Use numpy.equal(x,y) for element-wise equality testing.
""" """
__array_priority__ = 0 # Never give back Param __array_priority__ = -1 # Never give back Param
_fixes_ = None _fixes_ = None
def __new__(cls, name, input_array, *args, **kwargs): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
@ -66,8 +60,8 @@ class Param(ObservableArray, Constrainable):
obj.gradient = None obj.gradient = None
return obj return obj
def __init__(self, name, input_array): def __init__(self, name, input_array, default_constraint=None):
super(Param, self).__init__(name=name) super(Param, self).__init__(name=name, default_constraint=default_constraint)
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
@ -75,7 +69,7 @@ class Param(ObservableArray, Constrainable):
super(Param, self).__array_finalize__(obj) super(Param, self).__array_finalize__(obj)
self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._direct_parent_ = getattr(obj, '_direct_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None) self._parent_index_ = getattr(obj, '_parent_index_', None)
self._highest_parent_ = getattr(obj, '_highest_parent_', None) self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None) self._current_slice_ = getattr(obj, '_current_slice_', None)
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None) self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
self._tied_to_ = getattr(obj, '_tied_to_', None) self._tied_to_ = getattr(obj, '_tied_to_', None)
@ -98,7 +92,7 @@ class Param(ObservableArray, Constrainable):
(self.name, (self.name,
self._direct_parent_, self._direct_parent_,
self._parent_index_, self._parent_index_,
self._highest_parent_, self._default_constraint_,
self._current_slice_, self._current_slice_,
self._realshape_, self._realshape_,
self._realsize_, self._realsize_,
@ -119,7 +113,7 @@ class Param(ObservableArray, Constrainable):
self._realsize_ = state.pop() self._realsize_ = state.pop()
self._realshape_ = state.pop() self._realshape_ = state.pop()
self._current_slice_ = state.pop() self._current_slice_ = state.pop()
self._highest_parent_ = state.pop() self._default_constraint_ = state.pop()
self._parent_index_ = state.pop() self._parent_index_ = state.pop()
self._direct_parent_ = state.pop() self._direct_parent_ = state.pop()
self.name = state.pop() self.name = state.pop()
@ -153,28 +147,9 @@ class Param(ObservableArray, Constrainable):
@property @property
def _parameters_(self): def _parameters_(self):
return [] return []
def _connect_highest_parent(self, highest_parent):
self._highest_parent_ = highest_parent
def _collect_gradient(self, target): def _collect_gradient(self, target):
target[:] = self.gradient.flat target[:] = self.gradient.flat
#=========================================================================== #===========================================================================
# Fixing Parameters:
#===========================================================================
def constrain_fixed(self, warning=True):
"""
Constrain this paramter to be fixed to the current value it carries.
:param warning: print a warning for overwriting constraints.
"""
self._highest_parent_._fix(self,warning)
fix = constrain_fixed
def unconstrain_fixed(self):
"""
This parameter will no longer be fixed.
"""
self._highest_parent_._unfix(self)
unfix = unconstrain_fixed
#===========================================================================
# Tying operations -> bugged, TODO # Tying operations -> bugged, TODO
#=========================================================================== #===========================================================================
def tie_to(self, param): def tie_to(self, param):
@ -195,7 +170,7 @@ class Param(ObservableArray, Constrainable):
# this tie will be created to the parameter the param is tied # this tie will be created to the parameter the param is tied
# to. # to.
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param,param.__class__) assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__)
param = numpy.atleast_1d(param) param = numpy.atleast_1d(param)
if param.size != 1: if param.size != 1:
raise NotImplementedError, "Broadcast tying is not implemented yet" raise NotImplementedError, "Broadcast tying is not implemented yet"
@ -248,7 +223,7 @@ class Param(ObservableArray, Constrainable):
t_rav_i = t._raveled_index() t_rav_i = t._raveled_index()
tr_rav_i = tied_to_me._raveled_index() tr_rav_i = tied_to_me._raveled_index()
new_index = list(set(t_rav_i) | set(tr_rav_i)) new_index = list(set(t_rav_i) | set(tr_rav_i))
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index()) self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index())
del self._tied_to_me_[t] del self._tied_to_me_[t]
return return
@ -261,7 +236,7 @@ class Param(ObservableArray, Constrainable):
import ipdb;ipdb.set_trace() import ipdb;ipdb.set_trace()
new_index = list(set(t_rav_i) - set(tr_rav_i)) new_index = list(set(t_rav_i) - set(tr_rav_i))
if new_index: if new_index:
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t] self._tied_to_me_[tmp] = self._tied_to_me_[t]
del self._tied_to_me_[t] del self._tied_to_me_[t]
if len(self._tied_to_me_[tmp]) == 0: if len(self._tied_to_me_[tmp]) == 0:
@ -269,12 +244,12 @@ class Param(ObservableArray, Constrainable):
else: else:
del self._tied_to_me_[t] del self._tied_to_me_[t]
def _on_tied_parameter_changed(self, val, ind): def _on_tied_parameter_changed(self, val, ind):
if not self._updated_: #not fast_array_equal(self, val[ind]): if not self._updated_: # not fast_array_equal(self, val[ind]):
val = numpy.atleast_1d(val) val = numpy.atleast_1d(val)
self._updated_ = True self._updated_ = True
if self._original_: if self._original_:
self.__setitem__(slice(None), val[ind], update=False) self.__setitem__(slice(None), val[ind], update=False)
else: # this happens when indexing created a copy of the array else: # this happens when indexing created a copy of the array
self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False) self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False)
self._notify_tied_parameters() self._notify_tied_parameters()
self._updated_ = False self._updated_ = False
@ -303,11 +278,11 @@ class Param(ObservableArray, Constrainable):
def __getitem__(self, s, *args, **kwargs): def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple): if not isinstance(s, tuple):
s = (s,) s = (s,)
if not reduce(lambda a,b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
s += (Ellipsis,) s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs) new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass# returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val, update=True):
super(Param, self).__setitem__(s, val, update=update) super(Param, self).__setitem__(s, val, update=update)
@ -328,8 +303,8 @@ class Param(ObservableArray, Constrainable):
elif isinstance(si, (list,numpy.ndarray,tuple)): elif isinstance(si, (list,numpy.ndarray,tuple)):
a = si[0] a = si[0]
else: a = si else: a = si
if a<0: if a < 0:
a = self._realshape_[i]+a a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i] internal_offset += a * extended_realshape[i]
return internal_offset return internal_offset
def _raveled_index(self, slice_index=None): def _raveled_index(self, slice_index=None):
@ -337,8 +312,8 @@ class Param(ObservableArray, Constrainable):
# of this object # of this object
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
ind = self._indices(slice_index) ind = self._indices(slice_index)
if ind.ndim < 2: ind=ind[:,None] if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape*x), 1, ind), dtype=int) return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -351,11 +326,11 @@ class Param(ObservableArray, Constrainable):
if isinstance(a, slice): if isinstance(a, slice):
start, stop, step = a.indices(b) start, stop, step = a.indices(b)
return numpy.r_[start:stop:step] return numpy.r_[start:stop:step]
elif isinstance(a, (list,numpy.ndarray,tuple)): elif isinstance(a, (list, numpy.ndarray, tuple)):
a = numpy.asarray(a, dtype=int) a = numpy.asarray(a, dtype=int)
a[a<0] = b + a[a<0] a[a < 0] = b + a[a < 0]
elif a<0: elif a < 0:
a = b+a a = b + a
return numpy.r_[a] return numpy.r_[a]
return numpy.r_[:b] return numpy.r_[:b]
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
@ -379,7 +354,7 @@ class Param(ObservableArray, Constrainable):
#=========================================================================== #===========================================================================
@property @property
def _description_str(self): def _description_str(self):
if self.size <= 1: return ["%f"%self] if self.size <= 1: return ["%f" % self]
else: return [str(self.shape)] else: return [str(self.shape)]
def _parameter_names(self, add_name): def _parameter_names(self, add_name):
return [self.name] return [self.name]
@ -391,31 +366,31 @@ class Param(ObservableArray, Constrainable):
return [self.shape] return [self.shape]
@property @property
def _constraints_str(self): def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size==self._realsize_ else "{"+str(c[0])+"}", self._highest_parent_._constraints_iter_items(self)))] return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))]
@property @property
def _ties_str(self): def _ties_str(self):
return [t._short() for t in self._tied_to_] or [''] return [t._short() for t in self._tied_to_] or ['']
@property @property
def name_hirarchical(self): def name_hirarchical(self):
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name()+adjust_name_for_printing(self.name) return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
return adjust_name_for_printing(self.name) return adjust_name_for_printing(self.name)
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.name_hirarchical) x=self.name_hirarchical)
return name + super(Param, self).__repr__(*args,**kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index): def _ties_for(self, rav_index):
#size = sum(p.size for p in self._tied_to_) # size = sum(p.size for p in self._tied_to_)
ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param) ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
for i, tied_to in enumerate(self._tied_to_): for i, tied_to in enumerate(self._tied_to_):
for t, ind in tied_to._tied_to_me_.iteritems(): for t, ind in tied_to._tied_to_me_.iteritems():
if t._parent_index_ == self._parent_index_: if t._parent_index_ == self._parent_index_:
matches = numpy.where(rav_index[:,None] == t._raveled_index()[None, :]) matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
tt_rav_index = tied_to._raveled_index() tt_rav_index = tied_to._raveled_index()
ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0] ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches] if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap') else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
return map(lambda a: sum(a,[]), zip(*[[[tie.flatten()] if tx!=None else [] for tx in t] for t,tie in zip(ties,self._tied_to_)])) return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
def _constraints_for(self, rav_index): def _constraints_for(self, rav_index):
return self._highest_parent_._constraints_for(self, rav_index) return self._highest_parent_._constraints_for(self, rav_index)
def _indices(self, slice_index=None): def _indices(self, slice_index=None):
@ -425,12 +400,12 @@ class Param(ObservableArray, Constrainable):
if isinstance(slice_index, (tuple, list)): if isinstance(slice_index, (tuple, list)):
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len,clean_curr_slice))) <= 1): and len(set(map(len, clean_curr_slice))) <= 1):
return numpy.fromiter(itertools.izip(*clean_curr_slice), return numpy.fromiter(itertools.izip(*clean_curr_slice),
dtype=[('',int)]*self._realndim_,count=len(clean_curr_slice[0])).view((int, self._realndim_)) dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_))
expanded_index = list(self._expand_index(slice_index)) expanded_index = list(self._expand_index(slice_index))
return numpy.fromiter(itertools.product(*expanded_index), return numpy.fromiter(itertools.product(*expanded_index),
dtype=[('',int)]*self._realndim_,count=reduce(lambda a,b: a*b.size,expanded_index,1)).view((int, self._realndim_)) dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
def _max_len_names(self, gen, header): def _max_len_names(self, gen, header):
return reduce(lambda a, b:max(a, len(b)), gen, len(header)) return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self): def _max_len_values(self):
@ -443,9 +418,9 @@ class Param(ObservableArray, Constrainable):
if self._realsize_ < 2: if self._realsize_ < 2:
return name return name
ind = self._indices() ind = self._indices()
if ind.size > 4: indstr = ','.join(map(str,ind[:2])) + "..." + ','.join(map(str,ind[-2:])) if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str,ind)) else: indstr = ','.join(map(str, ind))
return name+'['+indstr+']' return name + '[' + indstr + ']'
def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None): def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None):
filter_ = self._current_slice_ filter_ = self._current_slice_
vals = self.flat vals = self.flat
@ -458,10 +433,10 @@ class Param(ObservableArray, Constrainable):
if lx is None: lx = self._max_len_values() if lx is None: lx = self._max_len_values()
if li is None: li = self._max_len_index(indices) if li is None: li = self._max_len_index(indices)
if lt is None: lt = self._max_len_names(ties, __tie_name__) if lt is None: lt = self._max_len_names(ties, __tie_name__)
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc,lx,li,lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header]+[" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc,lx,__precision__,li,lt, x=x, c=" ".join(map(str,c)), t=(t or ''), i=i) for i,x,c,t in itertools.izip(indices,vals,constr_matrix,ties)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)]) # return all the constraints with right indices
#except: return super(Param, self).__str__() # except: return super(Param, self).__str__()
class ParamConcatenation(object): class ParamConcatenation(object):
def __init__(self, params): def __init__(self, params):
@ -472,8 +447,8 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining. See :py:class:`GPy.core.parameter.Param` for more details on constraining.
""" """
#self.params = params # self.params = params
self.params = [] self.params = ParamList([])
for p in params: for p in params:
for p in p.flattened_parameters: for p in p.flattened_parameters:
if p not in self.params: if p not in self.params:
@ -493,7 +468,7 @@ class ParamConcatenation(object):
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self._vals(); vals[s] = val; del val vals = self._vals(); vals[s] = val; del val
[numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters() [numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
for p, ps in zip(self.params, self._param_slices_)] for p, ps in zip(self.params, self._param_slices_)]
if update: if update:
self.params[0]._highest_parent_.parameters_changed() self.params[0]._highest_parent_.parameters_changed()
def _vals(self): def _vals(self):
@ -501,46 +476,68 @@ class ParamConcatenation(object):
#=========================================================================== #===========================================================================
# parameter operations: # parameter operations:
#=========================================================================== #===========================================================================
def update_all_params(self):
self.params[0]._highest_parent_.parameters_changed()
def constrain(self, constraint, warning=True): def constrain(self, constraint, warning=True):
[param.constrain(constraint) for param in self.params] [param.constrain(constraint, update=False) for param in self.params]
self.update_all_params()
constrain.__doc__ = Param.constrain.__doc__ constrain.__doc__ = Param.constrain.__doc__
def constrain_positive(self, warning=True): def constrain_positive(self, warning=True):
[param.constrain_positive(warning) for param in self.params] [param.constrain_positive(warning, update=False) for param in self.params]
self.update_all_params()
constrain_positive.__doc__ = Param.constrain_positive.__doc__ constrain_positive.__doc__ = Param.constrain_positive.__doc__
def constrain_fixed(self, warning=True): def constrain_fixed(self, warning=True):
[param.constrain_fixed(warning) for param in self.params] [param.constrain_fixed(warning) for param in self.params]
constrain_fixed.__doc__ = Param.constrain_fixed.__doc__ constrain_fixed.__doc__ = Param.constrain_fixed.__doc__
fix = constrain_fixed fix = constrain_fixed
def constrain_negative(self, warning=True): def constrain_negative(self, warning=True):
[param.constrain_negative(warning) for param in self.params] [param.constrain_negative(warning, update=False) for param in self.params]
self.update_all_params()
constrain_negative.__doc__ = Param.constrain_negative.__doc__ constrain_negative.__doc__ = Param.constrain_negative.__doc__
def constrain_bounded(self, lower, upper, warning=True): def constrain_bounded(self, lower, upper, warning=True):
[param.constrain_bounded(lower, upper, warning) for param in self.params] [param.constrain_bounded(lower, upper, warning, update=False) for param in self.params]
self.update_all_params()
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__ constrain_bounded.__doc__ = Param.constrain_bounded.__doc__
def unconstrain(self, *constraints): def unconstrain(self, *constraints):
[param.unconstrain(*constraints) for param in self.params] [param.unconstrain(*constraints) for param in self.params]
unconstrain.__doc__ = Param.unconstrain.__doc__ unconstrain.__doc__ = Param.unconstrain.__doc__
def unconstrain_negative(self): def unconstrain_negative(self):
[param.unconstrain_negative() for param in self.params] [param.unconstrain_negative() for param in self.params]
unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__ unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__
def unconstrain_positive(self): def unconstrain_positive(self):
[param.unconstrain_positive() for param in self.params] [param.unconstrain_positive() for param in self.params]
unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__ unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__
def unconstrain_fixed(self): def unconstrain_fixed(self):
[param.unconstrain_fixed() for param in self.params] [param.unconstrain_fixed() for param in self.params]
unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__ unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__
unfix = unconstrain_fixed unfix = unconstrain_fixed
def unconstrain_bounded(self, lower, upper): def unconstrain_bounded(self, lower, upper):
[param.unconstrain_bounded(lower, upper) for param in self.params] [param.unconstrain_bounded(lower, upper) for param in self.params]
unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__ unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__
def untie(self, *ties): def untie(self, *ties):
[param.untie(*ties) for param in self.params] [param.untie(*ties) for param in self.params]
__lt__ = lambda self, val: self._vals()<val
__le__ = lambda self, val: self._vals()<=val def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
__eq__ = lambda self, val: self._vals()==val return self.params[0]._highest_parent_._checkgrad(self, verbose, step, tolerance)
__ne__ = lambda self, val: self._vals()!=val #checkgrad.__doc__ = Gradcheckable.checkgrad.__doc__
__gt__ = lambda self, val: self._vals()>val
__ge__ = lambda self, val: self._vals()>=val __lt__ = lambda self, val: self._vals() < val
__le__ = lambda self, val: self._vals() <= val
__eq__ = lambda self, val: self._vals() == val
__ne__ = lambda self, val: self._vals() != val
__gt__ = lambda self, val: self._vals() > val
__ge__ = lambda self, val: self._vals() >= val
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
def f(p): def f(p):
ind = p._raveled_index() ind = p._raveled_index()
@ -569,7 +566,7 @@ if __name__ == '__main__':
print "random done" print "random done"
p = Param("q_mean", X) p = Param("q_mean", X)
p1 = Param("q_variance", numpy.random.rand(*p.shape)) p1 = Param("q_variance", numpy.random.rand(*p.shape))
p2 = Param("Y", numpy.random.randn(p.shape[0],1)) p2 = Param("Y", numpy.random.randn(p.shape[0], 1))
p3 = Param("variance", numpy.random.rand()) p3 = Param("variance", numpy.random.rand())
p4 = Param("lengthscale", numpy.random.rand(2)) p4 = Param("lengthscale", numpy.random.rand(2))

View file

@ -48,19 +48,24 @@ class Pickleable(object):
#=============================================================================== #===============================================================================
class Parentable(object): class Parentable(object):
def __init__(self, direct_parent=None, highest_parent=None, parent_index=None): def __init__(self, direct_parent=None, parent_index=None):
super(Parentable,self).__init__() super(Parentable,self).__init__()
self._direct_parent_ = direct_parent self._direct_parent_ = direct_parent
self._parent_index_ = parent_index self._parent_index_ = parent_index
self._highest_parent_ = highest_parent
def has_parent(self): def has_parent(self):
return self._direct_parent_ is not None and self._highest_parent_ is not None return self._direct_parent_ is not None
@property
def _highest_parent_(self):
if self._direct_parent_ is None:
return self
return self._direct_parent_._highest_parent_
class Nameable(Parentable): class Nameable(Parentable):
_name = None _name = None
def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None): def __init__(self, name, direct_parent=None, parent_index=None):
super(Nameable,self).__init__(direct_parent, highest_parent, parent_index) super(Nameable,self).__init__(direct_parent, parent_index)
self._name = name or self.__class__.__name__ self._name = name or self.__class__.__name__
@property @property
@ -73,9 +78,41 @@ class Nameable(Parentable):
if self.has_parent(): if self.has_parent():
self._direct_parent_._name_changed(self, from_name) self._direct_parent_._name_changed(self, from_name)
class Gradcheckable(Parentable):
#===========================================================================
# Gradchecking
#===========================================================================
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
raise NotImplementedError, "Need log likelihood to check gradient against"
class Constrainable(Nameable): class Constrainable(Nameable):
def __init__(self, name): def __init__(self, name, default_constraint=None):
super(Constrainable,self).__init__(name) super(Constrainable,self).__init__(name)
self._default_constraint_ = default_constraint
#===========================================================================
# Fixing Parameters:
#===========================================================================
def constrain_fixed(self, value=None, warning=True):
"""
Constrain this paramter to be fixed to the current value it carries.
:param warning: print a warning for overwriting constraints.
"""
if value is not None:
self[:] = value
self._highest_parent_._fix(self,warning)
fix = constrain_fixed
def unconstrain_fixed(self):
"""
This parameter will no longer be fixed.
"""
self._highest_parent_._unfix(self)
unfix = unconstrain_fixed
#=========================================================================== #===========================================================================
# Constrain operations -> done # Constrain operations -> done
#=========================================================================== #===========================================================================
@ -98,30 +135,30 @@ class Constrainable(Nameable):
if update: if update:
self.parameters_changed() self.parameters_changed()
def constrain_positive(self, warning=True): def constrain_positive(self, warning=True, update=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default positive constraint. Constrain this parameter to the default positive constraint.
""" """
self.constrain(Logexp(), warning) self.constrain(Logexp(), warning=warning, update=update)
def constrain_negative(self, warning=True): def constrain_negative(self, warning=True, update=True):
""" """
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default negative constraint. Constrain this parameter to the default negative constraint.
""" """
self.constrain(NegativeLogexp(), warning) self.constrain(NegativeLogexp(), warning=warning, update=update)
def constrain_bounded(self, lower, upper, warning=True): def constrain_bounded(self, lower, upper, warning=True, update=True):
""" """
:param lower, upper: the limits to bound this parameter to :param lower, upper: the limits to bound this parameter to
:param warning: print a warning if re-constraining parameters. :param warning: print a warning if re-constraining parameters.
Constrain this parameter to lie within the given range. Constrain this parameter to lie within the given range.
""" """
self.constrain(Logistic(lower, upper), warning) self.constrain(Logistic(lower, upper), warning=warning, update=update)
def unconstrain(self, *transforms): def unconstrain(self, *transforms):
""" """

View file

@ -8,9 +8,10 @@ import cPickle
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation, Param from param import ParamConcatenation, Param
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
from index_operations import ParameterIndexOperations,\ from index_operations import ParameterIndexOperations,\
index_empty index_empty
from array_core import ParamList
#=============================================================================== #===============================================================================
# Printing: # Printing:
@ -23,7 +24,7 @@ FIXED = False
UNFIXED = True UNFIXED = True
#=============================================================================== #===============================================================================
class Parameterized(Constrainable, Pickleable, Observable): class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
""" """
Parameterized class Parameterized class
@ -69,7 +70,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
super(Parameterized, self).__init__(name=name) super(Parameterized, self).__init__(name=name)
self._in_init_ = True self._in_init_ = True
self._constraints_ = None#ParameterIndexOperations() self._constraints_ = None#ParameterIndexOperations()
self._parameters_ = [] self._parameters_ = ParamList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
@ -170,6 +171,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
for t, ind in param._constraints_.iteritems(): for t, ind in param._constraints_.iteritems():
self.constraints.add(t, ind+self._offset_for(param)) self.constraints.add(t, ind+self._offset_for(param))
param._constraints_.clear() param._constraints_.clear()
if param._default_constraint_ is not None:
self._add_constrain(param, param._default_constraint_, False)
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
self._fixes_= None self._fixes_= None
@ -188,10 +191,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
note: if it is a string object it will not (!) be regexp-matched note: if it is a string object it will not (!) be regexp-matched
automatically. automatically.
""" """
self._parameters_ = [p for p in self._parameters_ self._parameters_ = ParamList([p for p in self._parameters_
if not (p._parent_index_ in names_params_indices if not (p._parent_index_ in names_params_indices
or p.name in names_params_indices or p.name in names_params_indices
or p in names_params_indices)] or p in names_params_indices)])
self._connect_parameters() self._connect_parameters()
def parameters_changed(self): def parameters_changed(self):
@ -216,7 +219,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
for i,p in enumerate(self._parameters_): for i,p in enumerate(self._parameters_):
p._direct_parent_ = self p._direct_parent_ = self
p._parent_index_ = i p._parent_index_ = i
p._connect_highest_parent(self)
not_unique = [] not_unique = []
sizes.append(p.size+sizes[-1]) sizes.append(p.size+sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1])) self._param_slices_.append(slice(sizes[-2], sizes[-1]))
@ -231,14 +233,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
self.__dict__[pname] = p self.__dict__[pname] = p
self._added_names_.add(pname) self._added_names_.add(pname)
def _connect_highest_parent(self, highest_parent):
self._highest_parent_ = highest_parent
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
for p in self._parameters_:
p._connect_highest_parent(highest_parent)
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
@ -256,6 +250,16 @@ class Parameterized(Constrainable, Pickleable, Observable):
cPickle.dump(self, f, protocol) cPickle.dump(self, f, protocol)
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model """ """Returns a (deep) copy of the current model """
#dc = dict()
#for k, v in self.__dict__.iteritems():
#if k not in ['_highest_parent_', '_direct_parent_']:
#dc[k] = copy.deepcopy(v)
#dc = copy.deepcopy(self.__dict__)
#dc['_highest_parent_'] = None
#dc['_direct_parent_'] = None
#s = self.__class__.new()
#s.__dict__ = dc
return copy.deepcopy(self) return copy.deepcopy(self)
def __getstate__(self): def __getstate__(self):
if self._has_get_set_state(): if self._has_get_set_state():
@ -310,8 +314,11 @@ class Parameterized(Constrainable, Pickleable, Observable):
#=========================================================================== #===========================================================================
# Optimization handles: # Optimization handles:
#=========================================================================== #===========================================================================
def _get_param_names_transformed(self): def _get_param_names(self):
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()]) n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes(): if self._has_fixes():
return n[self._fixes_] return n[self._fixes_]
return n return n
@ -372,6 +379,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
that is an int array, containing the indexes for the flattened that is an int array, containing the indexes for the flattened
param inside this parameterized logic. param inside this parameterized logic.
""" """
if isinstance(param, ParamConcatenation):
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param) return param._raveled_index() + self._offset_for(param)
def _raveled_index(self): def _raveled_index(self):
@ -381,7 +390,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
""" """
return numpy.r_[:self.size] return numpy.r_[:self.size]
#=========================================================================== #===========================================================================
# Handle ties: # Fixing parameters:
#=========================================================================== #===========================================================================
def _set_fixed(self, param_or_index): def _set_fixed(self, param_or_index):
if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool) if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool)
@ -406,9 +415,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
if self._has_fixes(): if self._has_fixes():
return self._fixes_[self._raveled_index_for(param)] return self._fixes_[self._raveled_index_for(param)]
return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)] return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
#===========================================================================
# Fixing parameters:
#===========================================================================
def _fix(self, param, warning=True): def _fix(self, param, warning=True):
f = self._add_constrain(param, __fixed__, warning) f = self._add_constrain(param, __fixed__, warning)
self._set_fixed(f) self._set_fixed(f)
@ -419,6 +425,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
#=========================================================================== #===========================================================================
# Convenience for fixed, tied checking of param: # Convenience for fixed, tied checking of param:
#=========================================================================== #===========================================================================
def fixed_indices(self):
return np.array([x.is_fixed for x in self._parameters_])
def _is_fixed(self, param): def _is_fixed(self, param):
# returns if the whole param is fixed # returns if the whole param is fixed
if not self._has_fixes(): if not self._has_fixes():
@ -448,7 +456,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
# if removing constraints before adding new is not wanted, just delete the above line! # if removing constraints before adding new is not wanted, just delete the above line!
self.constraints.add(transform, rav_i) self.constraints.add(transform, rav_i)
param = self._get_original(param) param = self._get_original(param)
param._set_params(transform.initialize(param._get_params())) if not (transform == __fixed__):
param._set_params(transform.initialize(param._get_params()), update=False)
if warning and any(reconstrained): if warning and any(reconstrained):
# if you want to print the whole params object, which was reconstrained use: # if you want to print the whole params object, which was reconstrained use:
# m = str(param[self._backtranslate_index(param, reconstrained)]) # m = str(param[self._backtranslate_index(param, reconstrained)])

View file

@ -162,7 +162,9 @@ class Logistic(Transformation):
def initialize(self, f): def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)): if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints" print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#FIXME: Max, zeros_like right?
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
def __str__(self): def __str__(self):
return '{},{}'.format(self.lower, self.upper) return '{},{}'.format(self.lower, self.upper)

View file

@ -3,10 +3,8 @@ Created on 6 Nov 2013
@author: maxz @author: maxz
''' '''
import numpy as np
from parameterized import Parameterized from parameterized import Parameterized
from param import Param from param import Param
from ...util.misc import param_to_array
class Normal(Parameterized): class Normal(Parameterized):
''' '''
@ -26,6 +24,7 @@ class Normal(Parameterized):
See GPy.plotting.matplot_dep.variational_plots See GPy.plotting.matplot_dep.variational_plots
""" """
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args) return variational_plots.plot(self,*args)

View file

@ -154,13 +154,13 @@ class SVIGP(GP):
self.psi2 = None self.psi2 = None
def dL_dtheta(self): def dL_dtheta(self):
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch) dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else: else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z) dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch) dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta return dL_dtheta

View file

@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
num_inducing = 5 num_inducing = 5
if plot: if plot:
output_dim = 1 output_dim = 1
input_dim = 2 input_dim = 3
else: else:
input_dim = 2 input_dim = 1
output_dim = 25 output_dim = 25
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
+ GPy.kern.white(input_dim, 0.01)) #+ GPy.kern.white(input_dim, 0.01)
)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
#=========================================================================== #===========================================================================
# randomly obstruct data with percentage p # randomly obstruct data with percentage p
p = .8 p = .8
Y_obstruct = Y.copy() Y_obstruct = Y.copy()
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
#=========================================================================== #===========================================================================
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales m.lengthscales = lengthscales
if plot: if plot:
import matplotlib.pyplot as pb import matplotlib.pyplot as pb
m.plot() m.plot()
pb.title('PCA initialisation') pb.title('PCA initialisation')
m2.plot() #m2.plot()
pb.title('PCA initialisation') #pb.title('PCA initialisation')
if optimize: if optimize:
m.optimize('scg', messages=verbose) m.optimize('scg', messages=verbose)
m2.optimize('scg', messages=verbose) #m2.optimize('scg', messages=verbose)
if plot: if plot:
m.plot() m.plot()
pb.title('After optimisation') pb.title('After optimisation')
m2.plot() #m2.plot()
pb.title('After optimisation') #pb.title('After optimisation')
return m, m2 return m
def gplvm_oil_100(optimize=True, verbose=1, plot=True): def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy import GPy
@ -264,16 +264,16 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m['noise'] = Y.var() / 100. m.Gaussian_noise = Y.var() / 100.
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.plot_X_1d("BGPLVM Latent Space 1D") m.q.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m

View file

@ -37,39 +37,43 @@ def student_t_approx(optimize=True, plot=True):
# Kernel object # Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy() kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel3 = kernel1.copy() kernel3 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel4 = kernel1.copy() kernel4 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
#Gaussian GP model on clean data #Gaussian GP model on clean data
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
# optimize # optimize
m1.ensure_default_constraints() m1.ensure_default_constraints()
m1.constrain_fixed('white', 1e-5) m1['white'] = 1e-5
m1['white'].constrain_fixed('white')
m1.randomize() m1.randomize()
#Gaussian GP model on corrupt data #Gaussian GP model on corrupt data
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2) m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
m2.ensure_default_constraints() m2.ensure_default_constraints()
m2.constrain_fixed('white', 1e-5) m1['white'] = 1e-5
m1['white'].constrain_fixed('white')
m2.randomize() m2.randomize()
#Student t GP model on clean data #Student t GP model on clean data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood) m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
m3.ensure_default_constraints() m3.ensure_default_constraints()
m3.constrain_bounded('t_noise', 1e-6, 10.) m3['t_noise'].constrain_bounded(1e-6, 10.)
m3.constrain_fixed('white', 1e-5) m3['white'] = 1e-5
m3['white'].constrain_fixed()
m3.randomize() m3.randomize()
#Student t GP model on corrupt data #Student t GP model on corrupt data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution) laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
m4.ensure_default_constraints() m4.ensure_default_constraints()
m4.constrain_bounded('t_noise', 1e-6, 10.) m4['t_noise'].constrain_bounded(1e-6, 10.)
m4.constrain_fixed('white', 1e-5) m4['white'] = 1e-5
m4['white'].constrain_fixed()
m4.randomize() m4.randomize()
if optimize: if optimize:

View file

@ -281,11 +281,12 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson() kern = GPy.kern.rbf(1)
likelihood = GPy.likelihoods.Laplace(Y,noise_model) poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(X, Y, likelihood=likelihood) m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf)
if optimize: if optimize:
m.optimize(optimizer) m.optimize(optimizer)

View file

@ -97,10 +97,10 @@ class VarDTC(object):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z) self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.num_params == 0: if self.likelihood.num_params == 0:
@ -144,15 +144,15 @@ class VarDTC(object):
def dL_dtheta(self): def dL_dtheta(self):
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X) dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z) dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z) dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta dL_dtheta += self._dpsi1_dtheta
return dL_dtheta return dL_dtheta
def dL_dZ(self): def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z) dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX dL_dZ += self._dKmm_dX
return dL_dZ return dL_dZ

View file

@ -11,9 +11,8 @@
#http://gaussianprocess.org/gpml/code. #http://gaussianprocess.org/gpml/code.
import numpy as np import numpy as np
from ...util.linalg import mdot, jitchol, pddet, dpotrs, dtrtrs, dpotri, symmetrify from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify
from ...util.misc import param_to_array from ...util.misc import param_to_array
from functools import partial as partial_func
from posterior import Posterior from posterior import Posterior
import warnings import warnings
from scipy import optimize from scipy import optimize
@ -32,7 +31,7 @@ class LaplaceInference(object):
self._mode_finding_tolerance = 1e-7 self._mode_finding_tolerance = 1e-7
self._mode_finding_max_iter = 40 self._mode_finding_max_iter = 40
self.bad_fhat = True self.bad_fhat = True
self._previous_Ki_fhat = None
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None):
""" """
@ -50,16 +49,17 @@ class LaplaceInference(object):
Ki_f_init = np.zeros_like(Y) Ki_f_init = np.zeros_like(Y)
else: else:
Ki_f_init = self._previous_Ki_fhat Ki_f_init = self._previous_Ki_fhat
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, Ki_W_i, K_Wi_i, dL_dK, woodbury_vector = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, Y_metadata) log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
#likelihood.gradient = self.likelihood_gradients()
kern.update_gradients_full(dL_dK, X) kern.update_gradients_full(dL_dK, X)
likelihood.update_gradients(dL_dthetaL)
self._previous_Ki_fhat = Ki_fhat.copy() self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=woodbury_vector, woodbury_inv = K_Wi_i, K=K), log_marginal, {'dL_dK':dL_dK} return Posterior(woodbury_vector=woodbury_vector, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
""" """
@ -84,7 +84,6 @@ class LaplaceInference(object):
Ki_f = Ki_f_init.copy() Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f) f = np.dot(K, Ki_f)
#define the objective function (to be maximised) #define the objective function (to be maximised)
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata) return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
@ -133,13 +132,15 @@ class LaplaceInference(object):
return f, Ki_f return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, Y_metadata):
""" """
At the mode, compute the hessian and effective covariance matrix. At the mode, compute the hessian and effective covariance matrix.
returns: logZ : approximation to the marginal likelihood returns: logZ : approximation to the marginal likelihood
Cov : the approximation to the covariance matrix woodbury_vector : variable required for calculating the approximation to the covariance matrix
woodbury_inv : variable required for calculating the approximation to the covariance matrix
dL_dthetaL : array of derivatives (1 x num_kernel_params)
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
""" """
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata) W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
@ -153,45 +154,54 @@ class LaplaceInference(object):
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
#compute dL_dK #Compute vival matrices for derivatives
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i) dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
#Implicit
d3lik_d3fhat = likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata)
dL_dfhat = 0.5*(np.diag(Ki_W_i)[:, None]*d3lik_d3fhat) #why isn't this -0.5? s2 in R&W p126 line 9.
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata) woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)) dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
dL_dK = explicit_part + implicit_part ####################
#compute dL_dK#
return log_marginal, Ki_W_i, K_Wi_i, dL_dK, woodbury_vector ####################
if kern.size > 0 and not kern.is_fixed:
def likelihood_gradients(self):
"""
Gradients with respect to likelihood parameters (dL_dthetaL)
:rtype: array of derivatives (1 x num_likelihood_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
num_params = len(self._get_param_names())
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit #Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i]) explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
)
#Implicit #Implicit
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i]) implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
return dL_dthetaL dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros(likelihood.size)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
num_params = likelihood.size
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
else:
dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave): def _compute_B_statistics(self, K, W, log_concave):
""" """
@ -225,6 +235,5 @@ class LaplaceInference(object):
#K_Wi_i_2 , _= dpotri(L2) #K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2) #symmetrify(K_Wi_i_2)
return K_Wi_i, L, LiW12 return K_Wi_i, L, LiW12

View file

@ -80,7 +80,7 @@ class VarDTC(object):
# no backsubstitution because of bound explosion on tr(A) if not... # no backsubstitution because of bound explosion on tr(A) if not...
LmInv, _ = dtrtri(Lm, lower=1) LmInv, _ = dtrtri(Lm, lower=1)
A = LmInv.T.dot(psi2_beta.dot(LmInv)) A = LmInv.T.dot(psi2_beta.dot(LmInv))
print A.sum() #print A.sum()
else: else:
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))

View file

@ -8,6 +8,7 @@ from parts.prod import Prod as prod
from parts.linear import Linear from parts.linear import Linear
from parts.kernpart import Kernpart from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized): class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None): def __init__(self, input_dim, parts=[], input_slices=None):
@ -84,7 +85,7 @@ class kern(Parameterized):
# represents the gradient in the transformed space (i.e. that given by # represents the gradient in the transformed space (i.e. that given by
# get_params_transformed()) # get_params_transformed())
# #
# :param g: the gradient vector for the current model, usually created by dK_dtheta # :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """ # """
# x = self._get_params() # x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) # [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
@ -160,6 +161,9 @@ class kern(Parameterized):
return newkern return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other) return self.prod(other)
@ -291,7 +295,7 @@ class kern(Parameterized):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def dK_dtheta(self, dL_dK, X, X2=None): def _param_grad_helper(self, dL_dK, X, X2=None):
""" """
Compute the gradient of the covariance function with respect to the parameters. Compute the gradient of the covariance function with respect to the parameters.
@ -307,9 +311,9 @@ class kern(Parameterized):
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
target = np.zeros(self.size) target = np.zeros(self.size)
if X2 is None: if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else: else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target) return self._transform_gradients(target)
@ -481,6 +485,7 @@ from GPy.core.model import Model
class Kern_check_model(Model): class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20 num_samples = 20
num_samples2 = 10 num_samples2 = 10
if kernel==None: if kernel==None:
@ -494,12 +499,10 @@ class Kern_check_model(Model):
dL_dK = np.ones((X.shape[0], X2.shape[0])) dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel self.kernel=kernel
self.add_parameter(kernel)
self.X = X self.X = X
self.X2 = X2 self.X2 = X2
self.dL_dK = dL_dK self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self, 'kernel_test_model')
def is_positive_definite(self): def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0] v = np.linalg.eig(self.kernel.K(self.X))[0]
@ -508,15 +511,6 @@ class Kern_check_model(Model):
else: else:
return True return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self): def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
@ -529,7 +523,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2) return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model): class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
@ -537,6 +531,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None: if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0])) self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self): def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
@ -548,23 +544,16 @@ class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """ """This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten() return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self): class Kern_check_dKdiag_dX(Kern_check_dK_dX):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
class Kern_check_dKdiag_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None: if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0])) self.dL_dK = np.ones((self.X.shape[0]))
@ -574,15 +563,6 @@ class Kern_check_dKdiag_dX(Kern_check_model):
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their
@ -657,7 +637,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -673,7 +653,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -689,7 +669,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:

View file

@ -43,7 +43,7 @@ class Brownian(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance*X.flatten() target += self.variance*X.flatten()
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None: if X2 is None:
X2 = X X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK) target += np.sum(np.fmin(X,X2.T)*dL_dK)
@ -51,7 +51,7 @@ class Brownian(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag) target += np.dot(X.flatten(), dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
#target += self.variance #target += self.variance
#target -= self.variance*theta(X-X2.T) #target -= self.variance*theta(X-X2.T)

View file

@ -74,7 +74,7 @@ class Matern32(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
@ -96,7 +96,7 @@ class Matern32(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
@ -105,8 +105,8 @@ class Matern32(Kernpart):
else: else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -74,7 +74,7 @@ class Matern52(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -96,7 +96,7 @@ class Matern52(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
@ -104,8 +104,8 @@ class Matern52(Kernpart):
else: else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0) target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass

View file

@ -90,7 +90,7 @@ class ODE_1(Kernpart):
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target) np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.abs(X - X2.T) dist = np.abs(X - X2.T)

View file

@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass pass
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices. # First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK) self._extract_t_indices(X, X2, dL_dK=dL_dK)
@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,index,target): def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass pass
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
pass pass
def _extract_t_indices(self, X, X2=None, dL_dK=None): def _extract_t_indices(self, X, X2=None, dL_dK=None):

View file

@ -75,7 +75,7 @@ class Exponential(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
@ -95,13 +95,13 @@ class Exponential(Kernpart):
# NB: derivative of diagonal elements wrt lengthscale is 0 # NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
product = np.diag(self.K(X, X)) product = np.diag(self.K(X, X))
np.add(target,product,target) np.add(target,product,target)
def dK_dtheta(self,X,X2,target): def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)""" """Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F]) FX = np.column_stack([f(X) for f in self.F])

View file

@ -31,10 +31,10 @@ class Fixed(Kernpart):
def K(self, X, X2, target): def K(self, X, X2, target):
target += self.variance * self.fixed_K target += self.variance * self.fixed_K
def dK_dtheta(self, partial, X, X2, target): def _param_grad_helper(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum() target += (partial * self.fixed_K).sum()
def dK_dX(self, partial, X, X2, target): def gradients_X(self, partial, X, X2, target):
pass pass
def dKdiag_dX(self, partial, X, target): def dKdiag_dX(self, partial, X, target):

View file

@ -85,7 +85,7 @@ class Gibbs(Kernpart):
"""Compute the diagonal of the covariance matrix for X.""" """Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target) np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
self._dK_computations(dL_dK) self._dK_computations(dL_dK)
@ -97,7 +97,7 @@ class Gibbs(Kernpart):
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping]) target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent. # First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2) self._K_computations(X, X2)
@ -105,8 +105,8 @@ class Gibbs(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0) target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale. # Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK) self._dK_computations(dL_dK)
if X2 is None: if X2 is None:

View file

@ -80,7 +80,7 @@ class Hetero(Kernpart):
"""Helper function for computing the diagonal elements of the covariance.""" """Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2 return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X): if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
@ -90,7 +90,7 @@ class Hetero(Kernpart):
"""Gradient of diagonal of covariance with respect to parameters.""" """Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X) target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X: if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]

View file

@ -50,19 +50,19 @@ class Hierarchical(Kernpart):
#X,slices = X[:,:-1],index_to_slices(X[:,-1]) #X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] #[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
X, X2, slices, slices2 = self._sort_slices(X,X2) X, X2, slices, slices2 = self._sort_slices(X,X2)
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)] [[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1]) #X,slices = X[:,:-1],index_to_slices(X[:,-1])
#if X2 is None: #if X2 is None:
#X2,slices2 = X,slices #X2,slices2 = X,slices
#else: #else:
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) #X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] #[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
# #
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError raise NotImplementedError

View file

@ -70,22 +70,22 @@ class IndependentOutputs(Kernpart):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices] [[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices X2,slices2 = X,slices
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices X2,slices2 = X,slices
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])

View file

@ -20,7 +20,6 @@ class Kernpart(Parameterized):
# the number of optimisable parameters # the number of optimisable parameters
# the name of the covariance function. # the name of the covariance function.
# link to parameterized objects # link to parameterized objects
self._parameters_ = []
#self._X = None #self._X = None
def connect_input(self, X): def connect_input(self, X):
@ -76,14 +75,14 @@ class Kernpart(Parameterized):
raise NotImplementedError raise NotImplementedError
def Kdiag(self,X,target): def Kdiag(self,X,target):
raise NotImplementedError raise NotImplementedError
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling dK_dtheta. Need to # In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save # override for stationary covariances (for example) to save
# time. # time.
for i in range(X.shape[0]): for i in range(X.shape[0]):
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
@ -106,12 +105,19 @@ class Kernpart(Parameterized):
raise NotImplementedError raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target): def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart): class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False): def __init__(self, input_dim, lengthscale=None, ARD=False):

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Linear(Kernpart): class Linear(Kernpart):
""" """
@ -43,8 +44,9 @@ class Linear(Kernpart):
else: else:
variances = np.ones(self.input_dim) variances = np.ones(self.input_dim)
self.variances = Param('variances', variances) self.variances = Param('variances', variances, Logexp())
self.add_parameters(self.variances) self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance) self.variances.add_observer(self, self.update_variance)
# initialize cache # initialize cache
@ -57,20 +59,34 @@ class Linear(Kernpart):
def on_input_change(self, X): def on_input_change(self, X):
self._K_computations(X, None) self._K_computations(X, None)
# def _get_params(self): def update_gradients_full(self, dL_dK, X):
# return self.variances #self.variances.gradient[:] = 0
# self._param_grad_helper(dL_dK, X, self.variances.gradient)
# def _set_params(self, x):
# assert x.size == (self.num_params) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
# self.variances = x tmp = dL_dKdiag[:, None] * X ** 2
#def parameters_changed(self): if self.ARD:
# self.variances2 = np.square(self.variances) self.variances.gradient = tmp.sum(0)
# else:
# def _get_param_names(self): self.variances.gradient = tmp.sum()
# if self.num_params == 1: self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
# return ['variance'] self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
# else:
# return ['variance_%i' % i for i in range(self.variances.size)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
else: self.variances.gradient[:] = tmp.sum()
#psi1
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target): def K(self, X, X2, target):
if self.ARD: if self.ARD:
@ -88,7 +104,7 @@ class Linear(Kernpart):
def Kdiag(self, X, target): def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target) np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD: if self.ARD:
if X2 is None: if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
@ -100,14 +116,7 @@ class Linear(Kernpart):
self._K_computations(X, X2) self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK) target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target): def gradients_X(self, dL_dK, X, X2, target):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target):
if X2 is None: if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else: else:
@ -124,14 +133,6 @@ class Linear(Kernpart):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1) target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances target_S += dL_dpsi0[:, None] * self.variances
@ -140,17 +141,13 @@ class Linear(Kernpart):
"""the variance, it does nothing""" """the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target) self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1""" """Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T, Z, mu, target) self.gradients_X(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target): def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
@ -164,25 +161,17 @@ class Linear(Kernpart):
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0])) tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp) self.K(mu,Z,tmp)
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target) self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0) result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD: if self.ARD:
target += result.sum(0).sum(0).sum(0) target += result.sum(0).sum(0).sum(0)
else: else:
target += result.sum() target += result.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0])) tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp) self.K(mu,Z,tmp)
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:] Zs_sq = Zs[:,None,:]*Zs[None,:,:]
@ -288,11 +277,11 @@ class Linear(Kernpart):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy() self._X = X.copy()
if X2 is None: if X2 is None:
self._dot_product = tdot(X) self._dot_product = tdot(param_to_array(X))
self._X2 = None self._X2 = None
else: else:
self._X2 = X2.copy() self._X2 = X2.copy()
self._dot_product = np.dot(X, X2.T) self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S): def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1 and psi2

View file

@ -77,7 +77,7 @@ class MLP(Kernpart):
self._K_diag_computations(X) self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
denom3 = self._K_denom*self._K_denom*self._K_denom denom3 = self._K_denom*self._K_denom*self._K_denom
@ -107,7 +107,7 @@ class MLP(Kernpart):
target[0] += np.sum(self._K_dvar*dL_dK) target[0] += np.sum(self._K_dvar*dL_dK)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) self._K_computations(X, X2)
arg = self._K_asin_arg arg = self._K_asin_arg

View file

@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -86,7 +86,7 @@ class POLY(Kernpart):
self._K_diag_computations(X) self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1) base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
@ -99,7 +99,7 @@ class POLY(Kernpart):
target[2] += base_cov_grad.sum() target[2] += base_cov_grad.sum()
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) self._K_computations(X, X2)
arg = self._K_poly_arg arg = self._K_poly_arg

View file

@ -54,15 +54,15 @@ class Prod(Kernpart):
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters.""" """Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -81,21 +81,21 @@ class Prod(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei] #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X X2 = X
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else: else:
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
@ -103,8 +103,8 @@ class Prod(Kernpart):
self.k1.Kdiag(X[:,self.slice1],K1) self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2) self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
self._K_computations(X,X2) self._K_computations(X,X2)
target += self._K1 * self._K2 target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params]) self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:]) self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2) self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target) self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target) self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart):
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1) self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2) self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target) self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None: X2 = X if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale) dist2 = np.square((X-X2.T)/self.lengthscale)
@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart):
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged # here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale) dist2 = np.square((X-X.T)/self.lengthscale)

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class RBF(Kernpart): class RBF(Kernpart):
""" """
@ -50,9 +51,12 @@ class RBF(Kernpart):
else: else:
lengthscale = np.ones(self.input_dim) lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance) self.variance = Param('variance', variance, Logexp())
self.lengthscale = Param('lengthscale', lengthscale) self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale) self.lengthscale.add_observer(self, self.update_lengthscale)
self.update_lengthscale(self.lengthscale)
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache self.parameters_changed() # initializes cache
@ -114,7 +118,7 @@ class RBF(Kernpart):
self._K_computations(X, Z) self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else: else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
@ -123,7 +127,7 @@ class RBF(Kernpart):
self._K_computations(Z, None) self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
@ -138,7 +142,7 @@ class RBF(Kernpart):
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None] dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD: if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum() self.lengthscale.gradient = dpsi1_dlength.sum()
else: else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0) self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
@ -157,7 +161,7 @@ class RBF(Kernpart):
self._K_computations(Z, None) self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
@ -168,8 +172,8 @@ class RBF(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -97,7 +97,7 @@ class RBFInv(RBF):
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)] # return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale) # TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK) target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD: if self.ARD:
@ -142,14 +142,14 @@ class RBFInv(RBF):
else: else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2) target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None: if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -73,7 +73,7 @@ class RBFCos(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
self._K_computations(X,X2) self._K_computations(X,X2)
target[0] += np.sum(dL_dK*self._dvar) target[0] += np.sum(dL_dK*self._dvar)
if self.ARD: if self.ARD:
@ -88,7 +88,7 @@ class RBFCos(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
#TODO!!! #TODO!!!
raise NotImplementedError raise NotImplementedError

View file

@ -50,7 +50,7 @@ class Spline(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3. target += self.variance*X.flatten()**3/3.
def dK_dtheta(self,X,X2,target): def _param_grad_helper(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6. target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target): def dKdiag_dtheta(self,X,target):

View file

@ -144,8 +144,8 @@ class SS_RBF(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -40,7 +40,7 @@ class Symmetric(Kernpart):
self.k.K(X,AX2,target) self.k.K(X,AX2,target)
self.k.K(AX,AX2,target) self.k.K(AX,AX2,target)
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -48,13 +48,13 @@ class Symmetric(Kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(dL_dK,X,X2,target) self.k._param_grad_helper(dL_dK,X,X2,target)
self.k.dK_dtheta(dL_dK,AX,X2,target) self.k._param_grad_helper(dL_dK,AX,X2,target)
self.k.dK_dtheta(dL_dK,X,AX2,target) self.k._param_grad_helper(dL_dK,X,AX2,target)
self.k.dK_dtheta(dL_dK,AX,AX2,target) self.k._param_grad_helper(dL_dK,AX,AX2,target)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -62,10 +62,10 @@ class Symmetric(Kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dX(dL_dK, X, X2, target) self.k.gradients_X(dL_dK, X, X2, target)
self.k.dK_dX(dL_dK, AX, X2, target) self.k.gradients_X(dL_dK, AX, X2, target)
self.k.dK_dX(dL_dK, X, AX2, target) self.k.gradients_X(dL_dK, X, AX2, target)
self.k.dK_dX(dL_dK, AX ,AX2, target) self.k.gradients_X(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""

View file

@ -348,7 +348,7 @@ class spkern(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target) self._weave_inline(self._Kdiag_code, X, target)
def dK_dtheta(self,partial,X,Z,target): def _param_grad_helper(self,partial,X,Z,target):
if Z is None: if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial) self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else: else:
@ -357,7 +357,7 @@ class spkern(Kernpart):
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial) self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def dK_dX(self,partial,X,Z,target): def gradients_X(self,partial,X,Z,target):
if Z is None: if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial) self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else: else:

View file

@ -130,7 +130,10 @@ class Gaussian(Likelihood):
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi)) N = y.shape[0]
ln_det_cov = N*np.log(self.variance)
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None): def dlogpdf_dlink(self, link_f, y, extra_data=None):
""" """
@ -175,7 +178,8 @@ class Gaussian(Likelihood):
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1)) N = y.shape[0]
hess = -(1.0/self.variance)*np.ones((N, 1))
return hess return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None): def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@ -194,7 +198,8 @@ class Gaussian(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None] N = y.shape[0]
d3logpdf_dlink3 = np.zeros((N,1))
return d3logpdf_dlink3 return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None): def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
@ -215,7 +220,8 @@ class Gaussian(Likelihood):
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e)) N = y.shape[0]
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum? return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None): def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
@ -255,7 +261,8 @@ class Gaussian(Likelihood):
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None] N = y.shape[0]
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None): def dlogpdf_link_dtheta(self, f, y, extra_data=None):

View file

@ -44,6 +44,10 @@ class Likelihood(Parameterized):
def _gradients(self,partial): def _gradients(self,partial):
return np.zeros(0) return np.zeros(0)
def update_gradients(self, partial):
if self.size > 0:
raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized')
def _preprocess_values(self,Y): def _preprocess_values(self,Y):
""" """
In case it is needed, this function assess the output values or makes any pertinent transformation on them. In case it is needed, this function assess the output values or makes any pertinent transformation on them.
@ -303,31 +307,31 @@ class Likelihood(Parameterized):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if len(self._get_param_names()) > 0: if self.size > 0:
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data) return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.empty([1, 0]) return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None): def dlogpdf_df_dtheta(self, f, y, extra_data=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if len(self._get_param_names()) > 0: if self.size > 0:
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_1(dlogpdf_dlink_dtheta, dlink_df) return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None): def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if len(self._get_param_names()) > 0: if self.size > 0:
link_f = self.gp_link.transf(f) link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
@ -336,7 +340,7 @@ class Likelihood(Parameterized):
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
#Is no parameters so return an empty array for its derivatives #Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0]) return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None): def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data) dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
@ -345,9 +349,12 @@ class Likelihood(Parameterized):
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names' #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names()) try:
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names()) assert len(dlogpdf_dtheta) == self.size #1 x num_param array
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names()) assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
except Exception as e:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

View file

@ -19,8 +19,11 @@ class Poisson(Likelihood):
.. Note:: .. Note::
Y is expected to take values in {0,1,2,...} Y is expected to take values in {0,1,2,...}
""" """
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False): def __init__(self, gp_link=None):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance) if gp_link is None:
gp_link = link_functions.Log_ex_1()
super(Poisson, self).__init__(gp_link, name='Poisson')
def _preprocess_values(self,Y): def _preprocess_values(self,Y):
return Y return Y

View file

@ -8,6 +8,7 @@ import link_functions
from scipy import stats, integrate from scipy import stats, integrate
from scipy.special import gammaln, gamma from scipy.special import gammaln, gamma
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param
class StudentT(Likelihood): class StudentT(Likelihood):
""" """
@ -19,26 +20,30 @@ class StudentT(Likelihood):
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
""" """
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2): def __init__(self,gp_link=None, deg_free=5, sigma2=2):
self.v = deg_free if gp_link is None:
self.sigma2 = sigma2 gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T')
self.sigma2 = Param('t_noise', float(sigma2))
self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2)
self.add_parameter(self.v)
self.v.constrain_fixed()
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False self.log_concave = False
def _get_params(self): def parameters_changed(self):
return np.asarray(self.sigma2) self.variance = (self.v / float(self.v - 2)) * self.sigma2
def _get_param_names(self): def update_gradients(self, partial):
return ["t_noise_std2"] """
Pull out the gradients, be careful as the order must match the order
def _set_params(self, x): in which the parameters are added
self.sigma2 = float(x) """
self.sigma2.gradient = partial[0]
@property self.v.gradient = partial[1]
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def pdf_link(self, link_f, y, extra_data=None): def pdf_link(self, link_f, y, extra_data=None):
""" """
@ -82,10 +87,14 @@ class StudentT(Likelihood):
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f e = y - link_f
#FIXME:
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5) objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5) - gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
) )
return np.sum(objective) return np.sum(objective)
@ -222,17 +231,20 @@ class StudentT(Likelihood):
def dlogpdf_link_dtheta(self, f, y, extra_data=None): def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]]) dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None): def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None): def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data) d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None): def predictive_variance(self, mu, sigma, predictive_mean=None):
""" """
Compute predictive variance of student_t*normal p(y*|f*)p(f*) Compute predictive variance of student_t*normal p(y*|f*)p(f*)
@ -252,7 +264,7 @@ class StudentT(Likelihood):
return true_var return true_var
def _predictive_mean_analytical(self, mu, sigma): def predictive_mean(self, mu, sigma):
""" """
Compute mean of the prediction Compute mean of the prediction
""" """

View file

@ -57,4 +57,4 @@ class Kernel(Mapping):
return np.hstack((self._df_dA.flatten(), self._df_dbias)) return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import itertools
from gplvm import GPLVM from gplvm import GPLVM
from .. import kern from .. import kern
from ..core import SparseGP from ..core import SparseGP
@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, name='bayesian gplvm', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None: if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y) X = self.initialise_latent(init, input_dim, Y)
self.init = init self.init = init
if X_variance is None: if X_variance is None:
@ -44,9 +38,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
if kernel is None: if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim) kernel = kern.rbf(input_dim) # + kern.white(input_dim)
SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs) self.q = Normal(X, X_variance)
self.q = Normal(self.X, self.X_variance) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0) self.add_parameter(self.q, index=0)
self.ensure_default_constraints() self.ensure_default_constraints()
def _getstate(self): def _getstate(self):
@ -94,9 +88,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
return dKL_dmu, dKL_dS return dKL_dmu, dKL_dS
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
@ -107,10 +101,25 @@ class BayesianGPLVM(SparseGP, GPLVM):
var_S = np.sum(self.X_variance - np.log(self.X_variance)) var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def log_likelihood(self): def parameters_changed(self):
ll = SparseGP.log_likelihood(self) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
kl = self.KL_divergence() self._log_marginal_likelihood -= self.KL_divergence()
return ll - kl
#The derivative of the bound wrt the inducing inputs Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu, dL_dS = self.dL_dmuS()
dKL_dmu, dKL_dS = self.dKL_dmuS()
self.q.means.gradient = dL_dmu - dKL_dmu
self.q.variances.gradient = dL_dS - dKL_dS
# def log_likelihood(self):
# ll = SparseGP.log_likelihood(self)
# kl = self.KL_divergence()
# return ll - kl
def _dbound_dmuS(self): def _dbound_dmuS(self):
dKL_dmu, dKL_dS = self.dKL_dmuS() dKL_dmu, dKL_dS = self.dKL_dmuS()
@ -181,18 +190,18 @@ class BayesianGPLVM(SparseGP, GPLVM):
""" """
dmu_dX = np.zeros_like(Xnew) dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX return dmu_dX
def dmu_dXnew(self, Xnew): def dmu_dXnew(self, Xnew):
""" """
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
""" """
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing)) gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1)) ones = np.ones((1, 1))
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(dK_dX, self.Cpsi1Vf) return np.dot(gradients_X, self.Cpsi1Vf)
def plot_steepest_gradient_map(self, *args, ** kwargs): def plot_steepest_gradient_map(self, *args, ** kwargs):
""" """

View file

@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
GP._set_params(self, x[self.mapping.num_params:]) GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_df = self.kern.dK_dX(self.dL_dK, self.X) dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y) dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self))) return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -60,7 +60,7 @@ class GPLVM(GP):
def jacobian(self,X): def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim): for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target return target
def magnification(self,X): def magnification(self,X):

View file

@ -1,9 +1,10 @@
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt). # ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from GPy.core.model import Model from ..core.model import Model
import itertools import itertools
import numpy import numpy
from ..core.parameterization import Param
def get_shape(x): def get_shape(x):
if isinstance(x, numpy.ndarray): if isinstance(x, numpy.ndarray):
@ -59,7 +60,7 @@ class GradientChecker(Model):
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
""" """
Model.__init__(self) Model.__init__(self, 'GradientChecker')
if isinstance(x0, (list, tuple)) and names is None: if isinstance(x0, (list, tuple)) and names is None:
self.shapes = [get_shape(xi) for xi in x0] self.shapes = [get_shape(xi) for xi in x0]
self.names = ['X{i}'.format(i=i) for i in range(len(x0))] self.names = ['X{i}'.format(i=i) for i in range(len(x0))]
@ -72,8 +73,10 @@ class GradientChecker(Model):
else: else:
self.names = names self.names = names
self.shapes = [get_shape(x0)] self.shapes = [get_shape(x0)]
for name, xi in zip(self.names, at_least_one_element(x0)): for name, xi in zip(self.names, at_least_one_element(x0)):
self.__setattr__(name, xi) self.__setattr__(name, Param(name, xi))
self.add_parameter(self.__getattribute__(name))
# self._param_names = [] # self._param_names = []
# for name, shape in zip(self.names, self.shapes): # for name, shape in zip(self.names, self.shapes):
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) # self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
@ -93,20 +96,18 @@ class GradientChecker(Model):
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten() return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
#def _get_params(self):
#return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
def _get_params(self): #def _set_params(self, x):
return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names))) #current_index = 0
#for name, shape in zip(self.names, self.shapes):
#current_size = numpy.prod(shape)
#self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
#current_index += current_size
#def _get_param_names(self):
def _set_params(self, x): #_param_names = []
current_index = 0 #for name, shape in zip(self.names, self.shapes):
for name, shape in zip(self.names, self.shapes): #_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
current_size = numpy.prod(shape) #return _param_names
self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
current_index += current_size
def _get_param_names(self):
_param_names = []
for name, shape in zip(self.names, self.shapes):
_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
return _param_names

View file

@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
def dL_dX(self): def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z) dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
return dL_dX return dL_dX

View file

@ -1,4 +1,5 @@
import pylab as pb import pylab as pb, numpy as np
from ...util.misc import param_to_array
def plot(parameterized, fignum=None, ax=None, colors=None): def plot(parameterized, fignum=None, ax=None, colors=None):
""" """

View file

@ -5,7 +5,7 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
verbose = False verbose = True
try: try:
import sympy import sympy

View file

@ -4,10 +4,11 @@ import GPy
from GPy.models import GradientChecker from GPy.models import GradientChecker
import functools import functools
import inspect import inspect
from GPy.likelihoods.noise_models import gp_transformations from GPy.likelihoods import link_functions
from ..core.parameterization import Param
from functools import partial from functools import partial
#np.random.seed(300) #np.random.seed(300)
np.random.seed(7) #np.random.seed(7)
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
""" """
@ -22,12 +23,14 @@ def dparam_partial(inst_func, *args):
the f or Y that are being used in the function whilst we tweak the the f or Y that are being used in the function whilst we tweak the
param param
""" """
def param_func(param, inst_func, args): def param_func(param_val, param_name, inst_func, args):
inst_func.im_self._set_params(param) #inst_func.im_self._set_params(param)
#inst_func.im_self.add_parameter(Param(param_name, param_val))
inst_func.im_self[param_name] = param_val
return inst_func(*args) return inst_func(*args)
return functools.partial(param_func, inst_func=inst_func, args=args) return functools.partial(param_func, inst_func=inst_func, args=args)
def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False): def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, randomize=False, verbose=False):
""" """
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else However if we are holding other parameters fixed and moving something else
@ -38,27 +41,34 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
The number of parameters and N is the number of data The number of parameters and N is the number of data
Need to take a slice out from f and a slice out of df Need to take a slice out from f and a slice out of df
""" """
#print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__, print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
#func.__name__, dfunc.__name__) func.__name__, dfunc.__name__)
partial_f = dparam_partial(func, *args) partial_f = dparam_partial(func, *args)
partial_df = dparam_partial(dfunc, *args) partial_df = dparam_partial(dfunc, *args)
gradchecking = True gradchecking = True
for param in params: zipped_params = zip(params, params_names)
fnum = np.atleast_1d(partial_f(param)).shape[0] for param_ind, (param_val, param_name) in enumerate(zipped_params):
dfnum = np.atleast_1d(partial_df(param)).shape[0] #Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
for fixed_val in range(dfnum): for fixed_val in range(dfnum):
#dlik and dlik_dvar gives back 1 value for each #dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1 f_ind = min(fnum, fixed_val+1) - 1
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val) print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
#Make grad checker with this param moving, note that set_params is NOT being called #Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__ #The parameter is being set directly with __setattr__
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind], #Check only the parameter and function value we wish to check at a time
lambda x : np.atleast_1d(partial_df(x))[fixed_val], grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
param, 'p') lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
#This is not general for more than one param... param_val, [param_name])
if constraints is not None: if constraints is not None:
for constraint in constraints: for constrain_param, constraint in constraints:
constraint('p', grad) if grad.grep_param_names(constrain_param):
constraint(constrain_param, grad)
else:
print "parameter didn't exist"
print constrain_param, " ", constraint
if randomize: if randomize:
grad.randomize() grad.randomize()
if verbose: if verbose:
@ -76,7 +86,7 @@ class TestNoiseModels(object):
Generic model checker Generic model checker
""" """
def setUp(self): def setUp(self):
self.N = 5 self.N = 15
self.D = 3 self.D = 3
self.X = np.random.rand(self.N, self.D)*10 self.X = np.random.rand(self.N, self.D)*10
@ -94,7 +104,7 @@ class TestNoiseModels(object):
self.var = np.random.rand(1) self.var = np.random.rand(1)
#Make a bigger step as lower bound can be quite curved #Make a bigger step as lower bound can be quite curved
self.step = 1e-3 self.step = 1e-4
def tearDown(self): def tearDown(self):
self.Y = None self.Y = None
@ -107,17 +117,20 @@ class TestNoiseModels(object):
#################################################### ####################################################
# Constraint wrappers so we can just list them off # # Constraint wrappers so we can just list them off #
#################################################### ####################################################
def constrain_fixed(regex, model):
model[regex].constrain_fixed()
def constrain_negative(regex, model): def constrain_negative(regex, model):
model.constrain_negative(regex) model[regex].constrain_negative()
def constrain_positive(regex, model): def constrain_positive(regex, model):
model.constrain_positive(regex) model[regex].constrain_positive()
def constrain_bounded(regex, model, lower, upper): def constrain_bounded(regex, model, lower, upper):
""" """
Used like: partial(constrain_bounded, lower=0, upper=1) Used like: partial(constrain_bounded, lower=0, upper=1)
""" """
model.constrain_bounded(regex, lower, upper) model[regex].constrain_bounded(lower, upper)
""" """
Dictionary where we nest models we would like to check Dictionary where we nest models we would like to check
@ -134,71 +147,81 @@ class TestNoiseModels(object):
} }
""" """
noise_models = {"Student_t_default": { noise_models = {"Student_t_default": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
}, },
"laplace": True "laplace": True
}, },
"Student_t_1_var": { "Student_t_1_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [1.0], "vals": [1.0],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_small_deg_free": {
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_small_var": { "Student_t_small_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [0.01], "vals": [0.0001],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_large_var": { "Student_t_large_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [10.0], "vals": [10.0],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_approx_gauss": { "Student_t_approx_gauss": {
"model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var), "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Student_t_log": { "Student_t_log": {
"model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var), "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
"grad_params": { "grad_params": {
"names": ["t_noise"], "names": ["t_noise"],
"vals": [self.var], "vals": [self.var],
"constraints": [constrain_positive] "constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
}, },
"laplace": True "laplace": True
}, },
"Gaussian_default": { "Gaussian_default": {
"model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N), "model": GPy.likelihoods.Gaussian(variance=self.var),
"grad_params": { "grad_params": {
"names": ["noise_model_variance"], "names": ["variance"],
"vals": [self.var], "vals": [self.var],
"constraints": [constrain_positive] "constraints": [("variance", constrain_positive)]
}, },
"laplace": True, "laplace": True,
"ep": True "ep": True
}, },
#"Gaussian_log": { #"Gaussian_log": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N), #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
#"grad_params": { #"grad_params": {
#"names": ["noise_model_variance"], #"names": ["noise_model_variance"],
#"vals": [self.var], #"vals": [self.var],
@ -207,7 +230,7 @@ class TestNoiseModels(object):
#"laplace": True #"laplace": True
#}, #},
#"Gaussian_probit": { #"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N), #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
#"grad_params": { #"grad_params": {
#"names": ["noise_model_variance"], #"names": ["noise_model_variance"],
#"vals": [self.var], #"vals": [self.var],
@ -216,7 +239,7 @@ class TestNoiseModels(object):
#"laplace": True #"laplace": True
#}, #},
#"Gaussian_log_ex": { #"Gaussian_log_ex": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N), #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"grad_params": { #"grad_params": {
#"names": ["noise_model_variance"], #"names": ["noise_model_variance"],
#"vals": [self.var], #"vals": [self.var],
@ -225,31 +248,31 @@ class TestNoiseModels(object):
#"laplace": True #"laplace": True
#}, #},
"Bernoulli_default": { "Bernoulli_default": {
"model": GPy.likelihoods.bernoulli(), "model": GPy.likelihoods.Bernoulli(),
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)], "link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True, "laplace": True,
"Y": self.binary_Y, "Y": self.binary_Y,
"ep": True "ep": True
}, },
"Exponential_default": { #"Exponential_default": {
"model": GPy.likelihoods.exponential(), #"model": GPy.likelihoods.exponential(),
"link_f_constraints": [constrain_positive], #"link_f_constraints": [constrain_positive],
"Y": self.positive_Y, #"Y": self.positive_Y,
"laplace": True, #"laplace": True,
}, #},
"Poisson_default": { #"Poisson_default": {
"model": GPy.likelihoods.poisson(), #"model": GPy.likelihoods.poisson(),
"link_f_constraints": [constrain_positive], #"link_f_constraints": [constrain_positive],
"Y": self.integer_Y, #"Y": self.integer_Y,
"laplace": True, #"laplace": True,
"ep": False #Should work though... #"ep": False #Should work though...
}, #},
"Gamma_default": { #"Gamma_default": {
"model": GPy.likelihoods.gamma(), #"model": GPy.likelihoods.gamma(),
"link_f_constraints": [constrain_positive], #"link_f_constraints": [constrain_positive],
"Y": self.positive_Y, #"Y": self.positive_Y,
"laplace": True #"laplace": True
} #}
} }
for name, attributes in noise_models.iteritems(): for name, attributes in noise_models.iteritems():
@ -286,8 +309,8 @@ class TestNoiseModels(object):
else: else:
ep = False ep = False
if len(param_vals) > 1: #if len(param_vals) > 1:
raise NotImplementedError("Cannot support multiple params in likelihood yet!") #raise NotImplementedError("Cannot support multiple params in likelihood yet!")
#Required by all #Required by all
#Normal derivatives #Normal derivatives
@ -302,13 +325,13 @@ class TestNoiseModels(object):
yield self.t_d3logpdf_df3, model, Y, f yield self.t_d3logpdf_df3, model, Y, f
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
#Params #Params
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
#Link params #Link params
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
#laplace likelihood gradcheck #laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
@ -370,33 +393,33 @@ class TestNoiseModels(object):
# df_dparams # # df_dparams #
############## ##############
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints): def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta, dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints): def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta, dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints): def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta, dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, args=(f, Y), constraints=param_constraints, params, params_names, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True) randomize=False, verbose=True)
) )
################ ################
@ -454,32 +477,32 @@ class TestNoiseModels(object):
# dlink_dparams # # dlink_dparams #
################# #################
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints): def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta, dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints): def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta, dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@with_setup(setUp, tearDown) @with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints): def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
print model print model
assert ( assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta, dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, args=(f, Y), constraints=param_constraints, params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True) randomize=False, verbose=True)
) )
@ -493,18 +516,23 @@ class TestNoiseModels(object):
Y = Y/Y.max() Y = Y/Y.max()
white_var = 1e-6 white_var = 1e-6
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model) laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood) m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_fixed('white', white_var) m['white'].constrain_fixed(white_var)
for param_num in range(len(param_names)): #Set constraints
name = param_names[param_num] for constrain_param, constraint in constraints:
m[name] = param_vals[param_num] constraint(constrain_param, m)
constraints[param_num](name, m)
print m print m
m.randomize() m.randomize()
#Set params
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
#m.optimize(max_iters=8) #m.optimize(max_iters=8)
print m print m
m.checkgrad(verbose=1, step=step) m.checkgrad(verbose=1, step=step)
@ -525,10 +553,10 @@ class TestNoiseModels(object):
Y = Y/Y.max() Y = Y/Y.max()
white_var = 1e-6 white_var = 1e-6
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model) ep_inf = GPy.inference.latent_function_inference.EP()
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood) m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_fixed('white', white_var) m['white'].constrain_fixed(white_var)
for param_num in range(len(param_names)): for param_num in range(len(param_names)):
name = param_names[param_num] name = param_names[param_num]
@ -559,8 +587,8 @@ class LaplaceTests(unittest.TestCase):
self.var = 0.2 self.var = 0.2
self.var = np.random.rand(1) self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var) self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N) self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
#Make a bigger step as lower bound can be quite curved #Make a bigger step as lower bound can be quite curved
self.step = 1e-6 self.step = 1e-6
@ -584,7 +612,7 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N) self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y) dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y) d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
@ -605,23 +633,27 @@ class LaplaceTests(unittest.TestCase):
#Yc = Y.copy() #Yc = Y.copy()
#Yc[75:80] += 1 #Yc[75:80] += 1
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy() #FIXME: Make sure you can copy kernels when params is fixed
#kernel2 = kernel1.copy()
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
m1.constrain_fixed('white', 1e-6) exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
m1['noise'] = initial_var_guess m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
m1.constrain_bounded('noise', 1e-4, 10) m1['white'].constrain_fixed(1e-6)
m1.constrain_bounded('rbf', 1e-4, 10) m1['variance'] = initial_var_guess
m1['variance'].constrain_bounded(1e-4, 10)
m1['rbf'].constrain_bounded(1e-4, 10)
m1.ensure_default_constraints() m1.ensure_default_constraints()
m1.randomize() m1.randomize()
gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0]) gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr) laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood) m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
m2.ensure_default_constraints() m2.ensure_default_constraints()
m2.constrain_fixed('white', 1e-6) m2['white'].constrain_fixed(1e-6)
m2.constrain_bounded('rbf', 1e-4, 10) m2['rbf'].constrain_bounded(1e-4, 10)
m2.constrain_bounded('noise', 1e-4, 10) m2['variance'].constrain_bounded(1e-4, 10)
m2.randomize() m2.randomize()
if debug: if debug:
@ -667,7 +699,7 @@ class LaplaceTests(unittest.TestCase):
#Check Y's are the same #Check Y's are the same
np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5) np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
#Check marginals are the same #Check marginals are the same
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check marginals are the same with random #Check marginals are the same with random

View file

@ -9,42 +9,44 @@ import numpy
import GPy import GPy
import itertools import itertools
from GPy.core import Model from GPy.core import Model
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.transformations import Logexp
class PsiStatModel(Model): class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel): def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
super(PsiStatModel, self).__init__(name='psi stat test')
self.which = which self.which = which
self.X = X self.X = Param("X", X)
self.X_variance = X_variance self.X_variance = Param('X_variance', X_variance, Logexp())
self.Z = Z self.Z = Param("Z", Z)
self.N, self.input_dim = X.shape self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self): self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
def _set_params(self, x, save_old=True, save_count=0):
start, end = 0, self.X.size
self.X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:])
def log_likelihood(self): def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def _log_likelihood_gradients(self):
def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
self.X.gradient = psimu
self.X_variance.gradient = psiS
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim) #psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError: psiZ = numpy.zeros_like(self.Z)
self.Z.gradient = psiZ
#psiZ = numpy.ones(self.num_inducing * self.input_dim) #psiZ = numpy.ones(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() N,M = self.X.shape[0], self.Z.shape[0]
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) dL_dpsi0, dL_dpsi1, dL_dpsi2 = numpy.zeros([N]), numpy.zeros([N,M]), numpy.zeros([N,M,M])
if self.which == 'psi0': dL_dpsi0 += 1
if self.which == 'psi1': dL_dpsi1 += 1
if self.which == 'psi2': dL_dpsi2 += 1
self.kern.update_gradients_variational(numpy.zeros([1,1]),
dL_dpsi0,
dL_dpsi1,
dL_dpsi2, self.X, self.X_variance, self.Z)
class DPsiStatTest(unittest.TestCase): class DPsiStatTest(unittest.TestCase):
input_dim = 5 input_dim = 5
@ -57,61 +59,66 @@ class DPsiStatTest(unittest.TestCase):
Y = X.dot(numpy.random.randn(input_dim, input_dim)) Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] # kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), kernels = [
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim), GPy.kern.linear(input_dim),
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)] GPy.kern.rbf(input_dim),
#GPy.kern.bias(input_dim),
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)
]
def testPsi0(self): def testPsi0(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\ m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() #m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) import ipdb;ipdb.set_trace()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi1(self): def testPsi1(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin(self): def testPsi2_lin(self):
k = self.kernels[0] k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin_bia(self): def testPsi2_lin_bia(self):
k = self.kernels[3] k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf(self): def testPsi2_rbf(self):
k = self.kernels[1] k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf_bia(self): def testPsi2_rbf_bia(self):
k = self.kernels[-1] k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_bia(self): def testPsi2_bia(self):
k = self.kernels[2] k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints() m.ensure_default_constraints(warning=0)
m.randomize() m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,10 +1,9 @@
import numpy as np from ..core.parameterization.array_core import ObservableArray, ParamList
from ..core.parameterization.array_core import ObservableArray
class Cacher(object): class Cacher(object):
def __init__(self, operation, limit=5): def __init__(self, operation, limit=5):
self.limit = int(limit) self.limit = int(limit)
self.operation=operation self.operation=operation
self.cached_inputs = [] self.cached_inputs = ParamList([])
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []