Merged parameterized fixing

This commit is contained in:
Alan Saul 2014-02-10 15:42:47 +00:00
commit b72d319909
41 changed files with 248 additions and 270 deletions

View file

@ -439,6 +439,8 @@ class Model(Parameterized):
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self.objective_function_gradients(x)
np.where(gradient==0, 1e-312, gradient)
for i in param_list: for i in param_list:
xx = x.copy() xx = x.copy()
@ -446,11 +448,10 @@ class Model(Parameterized):
f1, g1 = self.objective_and_gradients(xx) f1, g1 = self.objective_and_gradients(xx)
xx[i] -= 2.*step xx[i] -= 2.*step
f2, g2 = self.objective_and_gradients(xx) f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) ratio = (f1 - f2) / (2 * step * gradient[i])
difference = np.abs((f1 - f2) / 2 / step - gradient) difference = np.abs((f1 - f2) / 2 / step - gradient[i])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i]) formatted_name = "\033[92m {0} \033[0m".format(names[i])
@ -458,7 +459,7 @@ class Model(Parameterized):
formatted_name = "\033[91m {0} \033[0m".format(names[i]) formatted_name = "\033[91m {0} \033[0m".format(names[i])
r = '%.6f' % float(ratio) r = '%.6f' % float(ratio)
d = '%.6f' % float(difference) d = '%.6f' % float(difference)
g = '%.6f' % gradient g = '%.6f' % gradient[i]
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string

View file

@ -15,8 +15,18 @@ class ListArray(np.ndarray):
def __new__(cls, input_array): def __new__(cls, input_array):
obj = np.asanyarray(input_array).view(cls) obj = np.asanyarray(input_array).view(cls)
return obj return obj
def __eq__(self, other): #def __eq__(self, other):
return other is self # return other is self
class ParamList(list):
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass
class ObservableArray(ListArray, Observable): class ObservableArray(ListArray, Observable):
""" """
@ -36,16 +46,19 @@ class ObservableArray(ListArray, Observable):
if obj is None: return if obj is None: return
self._observers_ = getattr(obj, '_observers_', None) self._observers_ = getattr(obj, '_observers_', None)
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val, update=True):
if self.ndim:
if not np.all(np.equal(self[s], val)):
super(ObservableArray, self).__setitem__(s, val) super(ObservableArray, self).__setitem__(s, val)
if update: if update:
self._notify_observers() self._notify_observers()
else: # if self.ndim:
if not np.all(np.equal(self, val)): # if not np.all(np.equal(self[s], val)):
super(ObservableArray, self).__setitem__(Ellipsis, val) # super(ObservableArray, self).__setitem__(s, val)
if update: # if update:
self._notify_observers() # self._notify_observers()
# else:
# if not np.all(np.equal(self, val)):
# super(ObservableArray, self).__setitem__(Ellipsis, val)
# if update:
# self._notify_observers()
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop)) return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val): def __setslice__(self, start, stop, val):

View file

@ -90,11 +90,6 @@ class ParameterIndexOperations(object):
return self._properties.values() return self._properties.values()
def properties_for(self, index): def properties_for(self, index):
# already_seen = dict()
# for ni in index:
# if ni not in already_seen:
# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]]
# yield already_seen[ni]
return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index) return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
def add(self, prop, indices): def add(self, prop, indices):
@ -111,70 +106,11 @@ class ParameterIndexOperations(object):
self._properties[prop] = diff self._properties[prop] = diff
else: else:
del self._properties[prop] del self._properties[prop]
#[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]]
return removed.astype(int) return removed.astype(int)
# else:
# for a in self.properties():
# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_:
# ind = create_raveled_indices(indices, shape, offset)
# diff = remove_indices(self[a], ind)
# removed = numpy.intersect1d(self[a], ind, True)
# if not index_empty(diff):
# self._properties[a] = diff
# else:
# del self._properties[a]
# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]]
# return removed.astype(int)
return numpy.array([]).astype(int) return numpy.array([]).astype(int)
def __getitem__(self, prop): def __getitem__(self, prop):
return self._properties[prop] return self._properties[prop]
# class TieIndexOperations(object):
# def __init__(self, params):
# self.params = params
# self.tied_from = ParameterIndexOperations()
# self.tied_to = ParameterIndexOperations()
# def add(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# self.tied_from.add(tied_to, rav_from)
# self.tied_to.add(tied_to, rav_to)
# return rav_from, rav_to
# def remove(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# rem_from = self.tied_from.remove(tied_to, rav_from)
# rem_to = self.tied_to.remove(tied_to, rav_to)
# left_from = self.tied_from._properties.pop(tied_to)
# left_to = self.tied_to._properties.pop(tied_to)
# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from
# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to
# return rav_from, rav_to
# def from_to_for(self, index):
# return self.tied_from.properties_for(index), self.tied_to.properties_for(index)
# def iter_from_to_indices(self):
# for k, f in self.tied_from.iteritems():
# yield f, self.tied_to[k]
# def iter_to_indices(self):
# return self.tied_to.iterindices()
# def iter_from_indices(self):
# return self.tied_from.iterindices()
# def iter_from_items(self):
# for f, i in self.tied_from.iteritems():
# yield f, i
# def iter_properties(self):
# return self.tied_from.iter_properties()
# def properties(self):
# return self.tied_from.properties()
# def from_to_indices(self, param):
# return self.tied_from[param], self.tied_to[param]
#
# # def create_raveled_indices(index, shape, offset=0):
# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index)
# # else: i = [slice(None), index]
# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset)
# # return ind
def combine_indices(arr1, arr2): def combine_indices(arr1, arr2):
return numpy.union1d(arr1, arr2) return numpy.union1d(arr1, arr2)

View file

@ -75,7 +75,6 @@ class Param(ObservableArray, Constrainable):
super(Param, self).__array_finalize__(obj) super(Param, self).__array_finalize__(obj)
self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._direct_parent_ = getattr(obj, '_direct_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None) self._parent_index_ = getattr(obj, '_parent_index_', None)
self._highest_parent_ = getattr(obj, '_highest_parent_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None) self._current_slice_ = getattr(obj, '_current_slice_', None)
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None) self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
self._tied_to_ = getattr(obj, '_tied_to_', None) self._tied_to_ = getattr(obj, '_tied_to_', None)
@ -98,7 +97,6 @@ class Param(ObservableArray, Constrainable):
(self.name, (self.name,
self._direct_parent_, self._direct_parent_,
self._parent_index_, self._parent_index_,
self._highest_parent_,
self._current_slice_, self._current_slice_,
self._realshape_, self._realshape_,
self._realsize_, self._realsize_,
@ -119,7 +117,6 @@ class Param(ObservableArray, Constrainable):
self._realsize_ = state.pop() self._realsize_ = state.pop()
self._realshape_ = state.pop() self._realshape_ = state.pop()
self._current_slice_ = state.pop() self._current_slice_ = state.pop()
self._highest_parent_ = state.pop()
self._parent_index_ = state.pop() self._parent_index_ = state.pop()
self._direct_parent_ = state.pop() self._direct_parent_ = state.pop()
self.name = state.pop() self.name = state.pop()
@ -153,8 +150,6 @@ class Param(ObservableArray, Constrainable):
@property @property
def _parameters_(self): def _parameters_(self):
return [] return []
def _connect_highest_parent(self, highest_parent):
self._highest_parent_ = highest_parent
def _collect_gradient(self, target): def _collect_gradient(self, target):
target[:] = self.gradient.flat target[:] = self.gradient.flat
#=========================================================================== #===========================================================================
@ -178,7 +173,7 @@ class Param(ObservableArray, Constrainable):
# this tie will be created to the parameter the param is tied # this tie will be created to the parameter the param is tied
# to. # to.
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param,param.__class__) assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__)
param = numpy.atleast_1d(param) param = numpy.atleast_1d(param)
if param.size != 1: if param.size != 1:
raise NotImplementedError, "Broadcast tying is not implemented yet" raise NotImplementedError, "Broadcast tying is not implemented yet"
@ -231,7 +226,7 @@ class Param(ObservableArray, Constrainable):
t_rav_i = t._raveled_index() t_rav_i = t._raveled_index()
tr_rav_i = tied_to_me._raveled_index() tr_rav_i = tied_to_me._raveled_index()
new_index = list(set(t_rav_i) | set(tr_rav_i)) new_index = list(set(t_rav_i) | set(tr_rav_i))
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index()) self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index())
del self._tied_to_me_[t] del self._tied_to_me_[t]
return return
@ -244,7 +239,7 @@ class Param(ObservableArray, Constrainable):
import ipdb;ipdb.set_trace() import ipdb;ipdb.set_trace()
new_index = list(set(t_rav_i) - set(tr_rav_i)) new_index = list(set(t_rav_i) - set(tr_rav_i))
if new_index: if new_index:
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t] self._tied_to_me_[tmp] = self._tied_to_me_[t]
del self._tied_to_me_[t] del self._tied_to_me_[t]
if len(self._tied_to_me_[tmp]) == 0: if len(self._tied_to_me_[tmp]) == 0:
@ -252,7 +247,7 @@ class Param(ObservableArray, Constrainable):
else: else:
del self._tied_to_me_[t] del self._tied_to_me_[t]
def _on_tied_parameter_changed(self, val, ind): def _on_tied_parameter_changed(self, val, ind):
if not self._updated_: #not fast_array_equal(self, val[ind]): if not self._updated_: # not fast_array_equal(self, val[ind]):
val = numpy.atleast_1d(val) val = numpy.atleast_1d(val)
self._updated_ = True self._updated_ = True
if self._original_: if self._original_:
@ -286,11 +281,11 @@ class Param(ObservableArray, Constrainable):
def __getitem__(self, s, *args, **kwargs): def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple): if not isinstance(s, tuple):
s = (s,) s = (s,)
if not reduce(lambda a,b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
s += (Ellipsis,) s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs) new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass# returning 0d array or float, double etc except AttributeError: pass # returning 0d array or float, double etc
return new_arr return new_arr
def __setitem__(self, s, val, update=True): def __setitem__(self, s, val, update=True):
super(Param, self).__setitem__(s, val, update=update) super(Param, self).__setitem__(s, val, update=update)
@ -311,8 +306,8 @@ class Param(ObservableArray, Constrainable):
elif isinstance(si, (list,numpy.ndarray,tuple)): elif isinstance(si, (list,numpy.ndarray,tuple)):
a = si[0] a = si[0]
else: a = si else: a = si
if a<0: if a < 0:
a = self._realshape_[i]+a a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i] internal_offset += a * extended_realshape[i]
return internal_offset return internal_offset
def _raveled_index(self, slice_index=None): def _raveled_index(self, slice_index=None):
@ -320,8 +315,8 @@ class Param(ObservableArray, Constrainable):
# of this object # of this object
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
ind = self._indices(slice_index) ind = self._indices(slice_index)
if ind.ndim < 2: ind=ind[:,None] if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape*x), 1, ind), dtype=int) return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -334,11 +329,11 @@ class Param(ObservableArray, Constrainable):
if isinstance(a, slice): if isinstance(a, slice):
start, stop, step = a.indices(b) start, stop, step = a.indices(b)
return numpy.r_[start:stop:step] return numpy.r_[start:stop:step]
elif isinstance(a, (list,numpy.ndarray,tuple)): elif isinstance(a, (list, numpy.ndarray, tuple)):
a = numpy.asarray(a, dtype=int) a = numpy.asarray(a, dtype=int)
a[a<0] = b + a[a<0] a[a < 0] = b + a[a < 0]
elif a<0: elif a < 0:
a = b+a a = b + a
return numpy.r_[a] return numpy.r_[a]
return numpy.r_[:b] return numpy.r_[:b]
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
@ -362,7 +357,7 @@ class Param(ObservableArray, Constrainable):
#=========================================================================== #===========================================================================
@property @property
def _description_str(self): def _description_str(self):
if self.size <= 1: return ["%f"%self] if self.size <= 1: return ["%f" % self]
else: return [str(self.shape)] else: return [str(self.shape)]
def _parameter_names(self, add_name): def _parameter_names(self, add_name):
return [self.name] return [self.name]
@ -374,31 +369,31 @@ class Param(ObservableArray, Constrainable):
return [self.shape] return [self.shape]
@property @property
def _constraints_str(self): def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size==self._realsize_ else "{"+str(c[0])+"}", self._highest_parent_._constraints_iter_items(self)))] return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))]
@property @property
def _ties_str(self): def _ties_str(self):
return [t._short() for t in self._tied_to_] or [''] return [t._short() for t in self._tied_to_] or ['']
@property @property
def name_hirarchical(self): def name_hirarchical(self):
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name()+adjust_name_for_printing(self.name) return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
return adjust_name_for_printing(self.name) return adjust_name_for_printing(self.name)
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.name_hirarchical) x=self.name_hirarchical)
return name + super(Param, self).__repr__(*args,**kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index): def _ties_for(self, rav_index):
#size = sum(p.size for p in self._tied_to_) # size = sum(p.size for p in self._tied_to_)
ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param) ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
for i, tied_to in enumerate(self._tied_to_): for i, tied_to in enumerate(self._tied_to_):
for t, ind in tied_to._tied_to_me_.iteritems(): for t, ind in tied_to._tied_to_me_.iteritems():
if t._parent_index_ == self._parent_index_: if t._parent_index_ == self._parent_index_:
matches = numpy.where(rav_index[:,None] == t._raveled_index()[None, :]) matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
tt_rav_index = tied_to._raveled_index() tt_rav_index = tied_to._raveled_index()
ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0] ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches] if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap') else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
return map(lambda a: sum(a,[]), zip(*[[[tie.flatten()] if tx!=None else [] for tx in t] for t,tie in zip(ties,self._tied_to_)])) return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
def _constraints_for(self, rav_index): def _constraints_for(self, rav_index):
return self._highest_parent_._constraints_for(self, rav_index) return self._highest_parent_._constraints_for(self, rav_index)
def _indices(self, slice_index=None): def _indices(self, slice_index=None):
@ -408,12 +403,12 @@ class Param(ObservableArray, Constrainable):
if isinstance(slice_index, (tuple, list)): if isinstance(slice_index, (tuple, list)):
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len,clean_curr_slice))) <= 1): and len(set(map(len, clean_curr_slice))) <= 1):
return numpy.fromiter(itertools.izip(*clean_curr_slice), return numpy.fromiter(itertools.izip(*clean_curr_slice),
dtype=[('',int)]*self._realndim_,count=len(clean_curr_slice[0])).view((int, self._realndim_)) dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_))
expanded_index = list(self._expand_index(slice_index)) expanded_index = list(self._expand_index(slice_index))
return numpy.fromiter(itertools.product(*expanded_index), return numpy.fromiter(itertools.product(*expanded_index),
dtype=[('',int)]*self._realndim_,count=reduce(lambda a,b: a*b.size,expanded_index,1)).view((int, self._realndim_)) dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
def _max_len_names(self, gen, header): def _max_len_names(self, gen, header):
return reduce(lambda a, b:max(a, len(b)), gen, len(header)) return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self): def _max_len_values(self):
@ -426,9 +421,9 @@ class Param(ObservableArray, Constrainable):
if self._realsize_ < 2: if self._realsize_ < 2:
return name return name
ind = self._indices() ind = self._indices()
if ind.size > 4: indstr = ','.join(map(str,ind[:2])) + "..." + ','.join(map(str,ind[-2:])) if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str,ind)) else: indstr = ','.join(map(str, ind))
return name+'['+indstr+']' return name + '[' + indstr + ']'
def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None): def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None):
filter_ = self._current_slice_ filter_ = self._current_slice_
vals = self.flat vals = self.flat
@ -441,10 +436,10 @@ class Param(ObservableArray, Constrainable):
if lx is None: lx = self._max_len_values() if lx is None: lx = self._max_len_values()
if li is None: li = self._max_len_index(indices) if li is None: li = self._max_len_index(indices)
if lt is None: lt = self._max_len_names(ties, __tie_name__) if lt is None: lt = self._max_len_names(ties, __tie_name__)
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc,lx,li,lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header]+[" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc,lx,__precision__,li,lt, x=x, c=" ".join(map(str,c)), t=(t or ''), i=i) for i,x,c,t in itertools.izip(indices,vals,constr_matrix,ties)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)]) # return all the constraints with right indices
#except: return super(Param, self).__str__() # except: return super(Param, self).__str__()
class ParamConcatenation(object): class ParamConcatenation(object):
def __init__(self, params): def __init__(self, params):
@ -455,8 +450,8 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining. See :py:class:`GPy.core.parameter.Param` for more details on constraining.
""" """
#self.params = params # self.params = params
self.params = [] self.params = ParamList([])
for p in params: for p in params:
for p in p.flattened_parameters: for p in p.flattened_parameters:
if p not in self.params: if p not in self.params:
@ -535,12 +530,12 @@ class ParamConcatenation(object):
def untie(self, *ties): def untie(self, *ties):
[param.untie(*ties) for param in self.params] [param.untie(*ties) for param in self.params]
__lt__ = lambda self, val: self._vals()<val __lt__ = lambda self, val: self._vals() < val
__le__ = lambda self, val: self._vals()<=val __le__ = lambda self, val: self._vals() <= val
__eq__ = lambda self, val: self._vals()==val __eq__ = lambda self, val: self._vals() == val
__ne__ = lambda self, val: self._vals()!=val __ne__ = lambda self, val: self._vals() != val
__gt__ = lambda self, val: self._vals()>val __gt__ = lambda self, val: self._vals() > val
__ge__ = lambda self, val: self._vals()>=val __ge__ = lambda self, val: self._vals() >= val
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
def f(p): def f(p):
ind = p._raveled_index() ind = p._raveled_index()
@ -569,7 +564,7 @@ if __name__ == '__main__':
print "random done" print "random done"
p = Param("q_mean", X) p = Param("q_mean", X)
p1 = Param("q_variance", numpy.random.rand(*p.shape)) p1 = Param("q_variance", numpy.random.rand(*p.shape))
p2 = Param("Y", numpy.random.randn(p.shape[0],1)) p2 = Param("Y", numpy.random.randn(p.shape[0], 1))
p3 = Param("variance", numpy.random.rand()) p3 = Param("variance", numpy.random.rand())
p4 = Param("lengthscale", numpy.random.rand(2)) p4 = Param("lengthscale", numpy.random.rand(2))

View file

@ -48,19 +48,25 @@ class Pickleable(object):
#=============================================================================== #===============================================================================
class Parentable(object): class Parentable(object):
def __init__(self, direct_parent=None, highest_parent=None, parent_index=None): def __init__(self, direct_parent=None, parent_index=None):
super(Parentable,self).__init__() super(Parentable,self).__init__()
self._direct_parent_ = direct_parent self._direct_parent_ = direct_parent
self._parent_index_ = parent_index self._parent_index_ = parent_index
self._highest_parent_ = highest_parent self._highest_parent_ = highest_parent
def has_parent(self): def has_parent(self):
return self._direct_parent_ is not None and self._highest_parent_ is not None return self._direct_parent_ is not None
@property
def _highest_parent_(self):
if self._direct_parent_ is None:
return self
return self._direct_parent_._highest_parent_
class Nameable(Parentable): class Nameable(Parentable):
_name = None _name = None
def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None): def __init__(self, name, direct_parent=None, parent_index=None):
super(Nameable,self).__init__(direct_parent, highest_parent, parent_index) super(Nameable,self).__init__(direct_parent, parent_index)
self._name = name or self.__class__.__name__ self._name = name or self.__class__.__name__
@property @property

View file

@ -11,6 +11,7 @@ from param import ParamConcatenation, Param
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing
from index_operations import ParameterIndexOperations,\ from index_operations import ParameterIndexOperations,\
index_empty index_empty
from array_core import ParamList
#=============================================================================== #===============================================================================
# Printing: # Printing:
@ -69,7 +70,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
super(Parameterized, self).__init__(name=name) super(Parameterized, self).__init__(name=name)
self._in_init_ = True self._in_init_ = True
self._constraints_ = None#ParameterIndexOperations() self._constraints_ = None#ParameterIndexOperations()
self._parameters_ = [] self._parameters_ = ParamList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
if not self._has_fixes(): if not self._has_fixes():
self._fixes_ = None self._fixes_ = None
@ -188,10 +189,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
note: if it is a string object it will not (!) be regexp-matched note: if it is a string object it will not (!) be regexp-matched
automatically. automatically.
""" """
self._parameters_ = [p for p in self._parameters_ self._parameters_ = ParamList([p for p in self._parameters_
if not (p._parent_index_ in names_params_indices if not (p._parent_index_ in names_params_indices
or p.name in names_params_indices or p.name in names_params_indices
or p in names_params_indices)] or p in names_params_indices)])
self._connect_parameters() self._connect_parameters()
def parameters_changed(self): def parameters_changed(self):
@ -216,7 +217,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
for i,p in enumerate(self._parameters_): for i,p in enumerate(self._parameters_):
p._direct_parent_ = self p._direct_parent_ = self
p._parent_index_ = i p._parent_index_ = i
p._connect_highest_parent(self)
not_unique = [] not_unique = []
sizes.append(p.size+sizes[-1]) sizes.append(p.size+sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1])) self._param_slices_.append(slice(sizes[-2], sizes[-1]))
@ -231,14 +231,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
self.__dict__[pname] = p self.__dict__[pname] = p
self._added_names_.add(pname) self._added_names_.add(pname)
def _connect_highest_parent(self, highest_parent):
self._highest_parent_ = highest_parent
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
for p in self._parameters_:
p._connect_highest_parent(highest_parent)
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
@ -382,6 +374,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
that is an int array, containing the indexes for the flattened that is an int array, containing the indexes for the flattened
param inside this parameterized logic. param inside this parameterized logic.
""" """
if isinstance(param, ParamConcatenation):
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param) return param._raveled_index() + self._offset_for(param)
def _raveled_index(self): def _raveled_index(self):
@ -419,10 +413,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
#=========================================================================== #===========================================================================
# Fixing parameters: # Fixing parameters:
#=========================================================================== #===========================================================================
def _fix(self, param, warning=True, update=True): def _fix(self, param, warning=True):
f = self._add_constrain(param, __fixed__, warning) f = self._add_constrain(param, __fixed__, warning)
self._set_fixed(f) self._set_fixed(f)
def _unfix(self, param, update=True): def _unfix(self, param):
if self._has_fixes(): if self._has_fixes():
f = self._remove_constrain(param, __fixed__) f = self._remove_constrain(param, __fixed__)
self._set_unfixed(f) self._set_unfixed(f)
@ -446,8 +440,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
# if advanced indexing is activated it happens that the array is a copy # if advanced indexing is activated it happens that the array is a copy
# you can retrieve the original param through this method, by passing # you can retrieve the original param through this method, by passing
# the copy here # the copy here
#return self._parameters_[param._parent_index_] return self._parameters_[param._parent_index_]
return param._direct_parent_._parameters_[param._parent_index_]
def hirarchy_name(self): def hirarchy_name(self):
if self.has_parent(): if self.has_parent():
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "." return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."

View file

@ -3,10 +3,8 @@ Created on 6 Nov 2013
@author: maxz @author: maxz
''' '''
import numpy as np
from parameterized import Parameterized from parameterized import Parameterized
from param import Param from param import Param
from ...util.misc import param_to_array
class Normal(Parameterized): class Normal(Parameterized):
''' '''
@ -26,6 +24,7 @@ class Normal(Parameterized):
See GPy.plotting.matplot_dep.variational_plots See GPy.plotting.matplot_dep.variational_plots
""" """
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args) return variational_plots.plot(self,*args)

View file

@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
num_inducing = 5 num_inducing = 5
if plot: if plot:
output_dim = 1 output_dim = 1
input_dim = 2 input_dim = 3
else: else:
input_dim = 2 input_dim = 1
output_dim = 25 output_dim = 25
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
+ GPy.kern.white(input_dim, 0.01)) #+ GPy.kern.white(input_dim, 0.01)
)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
#=========================================================================== #===========================================================================
# randomly obstruct data with percentage p # randomly obstruct data with percentage p
p = .8 p = .8
Y_obstruct = Y.copy() Y_obstruct = Y.copy()
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
#=========================================================================== #===========================================================================
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales m.lengthscales = lengthscales
if plot: if plot:
import matplotlib.pyplot as pb import matplotlib.pyplot as pb
m.plot() m.plot()
pb.title('PCA initialisation') pb.title('PCA initialisation')
m2.plot() #m2.plot()
pb.title('PCA initialisation') #pb.title('PCA initialisation')
if optimize: if optimize:
m.optimize('scg', messages=verbose) m.optimize('scg', messages=verbose)
m2.optimize('scg', messages=verbose) #m2.optimize('scg', messages=verbose)
if plot: if plot:
m.plot() m.plot()
pb.title('After optimisation') pb.title('After optimisation')
m2.plot() #m2.plot()
pb.title('After optimisation') #pb.title('After optimisation')
return m, m2 return m
def gplvm_oil_100(optimize=True, verbose=1, plot=True): def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy import GPy
@ -264,16 +264,16 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m['noise'] = Y.var() / 100. m.Gaussian_noise = Y.var() / 100.
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.plot_X_1d("BGPLVM Latent Space 1D") m.q.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m

View file

@ -118,8 +118,8 @@ class FITC(SparseGP):
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z) self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if self.likelihood.num_params == 0: if self.likelihood.num_params == 0:
@ -170,8 +170,8 @@ class FITC(SparseGP):
return dL_dtheta return dL_dtheta
def dL_dZ(self): def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z) dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX dL_dZ += self._dKmm_dX
return dL_dZ return dL_dZ

View file

@ -80,7 +80,7 @@ class VarDTC(object):
# no backsubstitution because of bound explosion on tr(A) if not... # no backsubstitution because of bound explosion on tr(A) if not...
LmInv, _ = dtrtri(Lm, lower=1) LmInv, _ = dtrtri(Lm, lower=1)
A = LmInv.T.dot(psi2_beta.dot(LmInv)) A = LmInv.T.dot(psi2_beta.dot(LmInv))
print A.sum() #print A.sum()
else: else:
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))

View file

@ -160,6 +160,9 @@ class kern(Parameterized):
return newkern return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other) return self.prod(other)
@ -550,7 +553,7 @@ class Kern_check_dK_dX(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten() return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self): def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])] return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
@ -657,7 +660,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -673,7 +676,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -689,7 +692,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
print("dK_dX not implemented for " + kern.name) print("gradients_X not implemented for " + kern.name)
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:

View file

@ -51,7 +51,7 @@ class Brownian(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag) target += np.dot(X.flatten(), dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
#target += self.variance #target += self.variance
#target -= self.variance*theta(X-X2.T) #target -= self.variance*theta(X-X2.T)

View file

@ -96,7 +96,7 @@ class Matern32(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
@ -105,8 +105,8 @@ class Matern32(Kernpart):
else: else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -96,7 +96,7 @@ class Matern52(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
@ -104,8 +104,8 @@ class Matern52(Kernpart):
else: else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0) target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass

View file

@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,index,target): def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass pass
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
pass pass
def _extract_t_indices(self, X, X2=None, dL_dK=None): def _extract_t_indices(self, X, X2=None, dL_dK=None):

View file

@ -95,13 +95,13 @@ class Exponential(Kernpart):
# NB: derivative of diagonal elements wrt lengthscale is 0 # NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -34,7 +34,7 @@ class Fixed(Kernpart):
def dK_dtheta(self, partial, X, X2, target): def dK_dtheta(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum() target += (partial * self.fixed_K).sum()
def dK_dX(self, partial, X, X2, target): def gradients_X(self, partial, X, X2, target):
pass pass
def dKdiag_dX(self, partial, X, target): def dKdiag_dX(self, partial, X, target):

View file

@ -97,7 +97,7 @@ class Gibbs(Kernpart):
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping]) target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent. # First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2) self._K_computations(X, X2)
@ -105,8 +105,8 @@ class Gibbs(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0) target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale. # Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK) self._dK_computations(dL_dK)
if X2 is None: if X2 is None:

View file

@ -90,7 +90,7 @@ class Hetero(Kernpart):
"""Gradient of diagonal of covariance with respect to parameters.""" """Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X) target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X: if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]

View file

@ -55,14 +55,14 @@ class Hierarchical(Kernpart):
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)] [[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1]) #X,slices = X[:,:-1],index_to_slices(X[:,-1])
#if X2 is None: #if X2 is None:
#X2,slices2 = X,slices #X2,slices2 = X,slices
#else: #else:
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) #X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] #[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
# #
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError raise NotImplementedError

View file

@ -79,13 +79,13 @@ class IndependentOutputs(Kernpart):
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
X2,slices2 = X,slices X2,slices2 = X,slices
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) X,slices = X[:,:-1],index_to_slices(X[:,-1])

View file

@ -20,7 +20,6 @@ class Kernpart(Parameterized):
# the number of optimisable parameters # the number of optimisable parameters
# the name of the covariance function. # the name of the covariance function.
# link to parameterized objects # link to parameterized objects
self._parameters_ = []
#self._X = None #self._X = None
def connect_input(self, X): def connect_input(self, X):
@ -106,7 +105,7 @@ class Kernpart(Parameterized):
raise NotImplementedError raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target): def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError raise NotImplementedError

View file

@ -57,6 +57,27 @@ class Linear(Kernpart):
def on_input_change(self, X): def on_input_change(self, X):
self._K_computations(X, None) self._K_computations(X, None)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient = tmp.sum(0)
else: self.variances.gradient = tmp.sum()
#psi1
self.dK_dtheta(dL_dpsi1, mu, Z, self.variances.gradient)
#from psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self.dK_dtheta(dL_dKmm, Z, None, self.variances.gradient)
# def _get_params(self): # def _get_params(self):
# return self.variances # return self.variances
# #
@ -107,7 +128,7 @@ class Linear(Kernpart):
else: else:
target += tmp.sum() target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
if X2 is None: if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else: else:
@ -150,7 +171,7 @@ class Linear(Kernpart):
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T, Z, mu, target) self.gradients_X(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target): def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
@ -182,7 +203,7 @@ class Linear(Kernpart):
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0])) tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp) self.K(mu,Z,tmp)
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:] Zs_sq = Zs[:,None,:]*Zs[None,:,:]

View file

@ -107,7 +107,7 @@ class MLP(Kernpart):
target[0] += np.sum(self._K_dvar*dL_dK) target[0] += np.sum(self._K_dvar*dL_dK)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) self._K_computations(X, X2)
arg = self._K_asin_arg arg = self._K_asin_arg

View file

@ -99,7 +99,7 @@ class POLY(Kernpart):
target[2] += base_cov_grad.sum() target[2] += base_cov_grad.sum()
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) self._K_computations(X, X2)
arg = self._K_poly_arg arg = self._K_poly_arg

View file

@ -81,21 +81,21 @@ class Prod(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei] #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X X2 = X
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else: else:
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
@ -103,8 +103,8 @@ class Prod(Kernpart):
self.k1.Kdiag(X[:,self.slice1],K1) self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2) self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2) self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target) self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target) self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart):
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1) self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2) self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target) self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart):
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged # here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale) dist2 = np.square((X-X.T)/self.lengthscale)

View file

@ -51,8 +51,11 @@ class RBF(Kernpart):
lengthscale = np.ones(self.input_dim) lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance) self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale) self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale) self.lengthscale.add_observer(self, self.update_lengthscale)
self.update_lengthscale(self.lengthscale)
self.add_parameters(self.variance, self.lengthscale) self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache self.parameters_changed() # initializes cache
@ -114,7 +117,7 @@ class RBF(Kernpart):
self._K_computations(X, Z) self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else: else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
@ -123,7 +126,7 @@ class RBF(Kernpart):
self._K_computations(Z, None) self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
@ -157,7 +160,7 @@ class RBF(Kernpart):
self._K_computations(Z, None) self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD: if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else: else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
@ -168,8 +171,8 @@ class RBF(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -142,14 +142,14 @@ class RBFInv(RBF):
else: else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2) target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def dK_dX(self, dL_dK, X, X2, target): def gradients_X(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None: if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -88,7 +88,7 @@ class RBFCos(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
#TODO!!! #TODO!!!
raise NotImplementedError raise NotImplementedError

View file

@ -144,8 +144,8 @@ class SS_RBF(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :]) _K_dist = 2*(X[:, None, :] - X[None, :, :])
else: else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
pass pass

View file

@ -54,7 +54,7 @@ class Symmetric(Kernpart):
self.k.dK_dtheta(dL_dK,AX,AX2,target) self.k.dK_dtheta(dL_dK,AX,AX2,target)
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -62,10 +62,10 @@ class Symmetric(Kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dX(dL_dK, X, X2, target) self.k.gradients_X(dL_dK, X, X2, target)
self.k.dK_dX(dL_dK, AX, X2, target) self.k.gradients_X(dL_dK, AX, X2, target)
self.k.dK_dX(dL_dK, X, AX2, target) self.k.gradients_X(dL_dK, X, AX2, target)
self.k.dK_dX(dL_dK, AX ,AX2, target) self.k.gradients_X(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""

View file

@ -357,7 +357,7 @@ class spkern(Kernpart):
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial) self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def dK_dX(self,partial,X,Z,target): def gradients_X(self,partial,X,Z,target):
if Z is None: if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial) self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else: else:

View file

@ -57,4 +57,4 @@ class Kernel(Mapping):
return np.hstack((self._df_dA.flatten(), self._df_dbias)) return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import itertools
from gplvm import GPLVM from gplvm import GPLVM
from .. import kern from .. import kern
from ..core import SparseGP from ..core import SparseGP
@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, name='bayesian gplvm', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
if X == None: if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y) X = self.initialise_latent(init, input_dim, Y)
self.init = init self.init = init
if X_variance is None: if X_variance is None:
@ -44,9 +38,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
if kernel is None: if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim) kernel = kern.rbf(input_dim) # + kern.white(input_dim)
SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs) self.q = Normal(X, X_variance)
self.q = Normal(self.X, self.X_variance) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0) self.add_parameter(self.q, index=0)
self.ensure_default_constraints() self.ensure_default_constraints()
def _getstate(self): def _getstate(self):
@ -94,9 +88,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
return dKL_dmu, dKL_dS return dKL_dmu, dKL_dS
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
@ -107,10 +101,25 @@ class BayesianGPLVM(SparseGP, GPLVM):
var_S = np.sum(self.X_variance - np.log(self.X_variance)) var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def log_likelihood(self): def parameters_changed(self):
ll = SparseGP.log_likelihood(self) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
kl = self.KL_divergence() self._log_marginal_likelihood -= self.KL_divergence()
return ll - kl
#The derivative of the bound wrt the inducing inputs Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu, dL_dS = self.dL_dmuS()
dKL_dmu, dKL_dS = self.dKL_dmuS()
self.q.means.gradient = dL_dmu - dKL_dmu
self.q.variances.gradient = dL_dS - dKL_dS
# def log_likelihood(self):
# ll = SparseGP.log_likelihood(self)
# kl = self.KL_divergence()
# return ll - kl
def _dbound_dmuS(self): def _dbound_dmuS(self):
dKL_dmu, dKL_dS = self.dKL_dmuS() dKL_dmu, dKL_dS = self.dKL_dmuS()
@ -181,18 +190,18 @@ class BayesianGPLVM(SparseGP, GPLVM):
""" """
dmu_dX = np.zeros_like(Xnew) dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX return dmu_dX
def dmu_dXnew(self, Xnew): def dmu_dXnew(self, Xnew):
""" """
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
""" """
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing)) gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1)) ones = np.ones((1, 1))
for i in range(self.Z.shape[0]): for i in range(self.Z.shape[0]):
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(dK_dX, self.Cpsi1Vf) return np.dot(gradients_X, self.Cpsi1Vf)
def plot_steepest_gradient_map(self, *args, ** kwargs): def plot_steepest_gradient_map(self, *args, ** kwargs):
""" """

View file

@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
GP._set_params(self, x[self.mapping.num_params:]) GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_df = self.kern.dK_dX(self.dL_dK, self.X) dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y) dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self))) return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -60,7 +60,7 @@ class GPLVM(GP):
def jacobian(self,X): def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim): for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target return target
def magnification(self,X): def magnification(self,X):

View file

@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
def dL_dX(self): def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z) dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
return dL_dX return dL_dX

View file

@ -1,4 +1,5 @@
import pylab as pb import pylab as pb, numpy as np
from ...util.misc import param_to_array
def plot(parameterized, fignum=None, ax=None, colors=None): def plot(parameterized, fignum=None, ax=None, colors=None):
""" """

View file

@ -1,10 +1,9 @@
import numpy as np from ..core.parameterization.array_core import ObservableArray, ParamList
from ..core.parameterization.array_core import ObservableArray
class Cacher(object): class Cacher(object):
def __init__(self, operation, limit=5): def __init__(self, operation, limit=5):
self.limit = int(limit) self.limit = int(limit)
self.operation=operation self.operation=operation
self.cached_inputs = [] self.cached_inputs = ParamList([])
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []