diff --git a/GPy/core/model.py b/GPy/core/model.py index a1b2abe4..35403ba7 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -439,18 +439,19 @@ class Model(Parameterized): print "No free parameters to check" return - + gradient = self.objective_function_gradients(x) + np.where(gradient==0, 1e-312, gradient) + for i in param_list: xx = x.copy() xx[i] += step f1, g1 = self.objective_and_gradients(xx) xx[i] -= 2.*step f2, g2 = self.objective_and_gradients(xx) - gradient = self.objective_function_gradients(x)[i] - + numerical_gradient = (f1 - f2) / (2 * step) - ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) - difference = np.abs((f1 - f2) / 2 / step - gradient) + ratio = (f1 - f2) / (2 * step * gradient[i]) + difference = np.abs((f1 - f2) / 2 / step - gradient[i]) if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: formatted_name = "\033[92m {0} \033[0m".format(names[i]) @@ -458,7 +459,7 @@ class Model(Parameterized): formatted_name = "\033[91m {0} \033[0m".format(names[i]) r = '%.6f' % float(ratio) d = '%.6f' % float(difference) - g = '%.6f' % gradient + g = '%.6f' % gradient[i] ng = '%.6f' % float(numerical_gradient) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) print grad_string diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 4c31f23b..c95f3ce3 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -15,8 +15,18 @@ class ListArray(np.ndarray): def __new__(cls, input_array): obj = np.asanyarray(input_array).view(cls) return obj - def __eq__(self, other): - return other is self + #def __eq__(self, other): + # return other is self + +class ParamList(list): + + def __contains__(self, other): + for el in self: + if el is other: + return True + return False + + pass class ObservableArray(ListArray, Observable): """ @@ -36,16 +46,19 @@ class ObservableArray(ListArray, Observable): if obj is None: return self._observers_ = getattr(obj, '_observers_', None) def __setitem__(self, s, val, update=True): - if self.ndim: - if not np.all(np.equal(self[s], val)): - super(ObservableArray, self).__setitem__(s, val) - if update: - self._notify_observers() - else: - if not np.all(np.equal(self, val)): - super(ObservableArray, self).__setitem__(Ellipsis, val) - if update: - self._notify_observers() + super(ObservableArray, self).__setitem__(s, val) + if update: + self._notify_observers() +# if self.ndim: +# if not np.all(np.equal(self[s], val)): +# super(ObservableArray, self).__setitem__(s, val) +# if update: +# self._notify_observers() +# else: +# if not np.all(np.equal(self, val)): +# super(ObservableArray, self).__setitem__(Ellipsis, val) +# if update: +# self._notify_observers() def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) def __setslice__(self, start, stop, val): diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index 99b5a4de..d52211c5 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -90,11 +90,6 @@ class ParameterIndexOperations(object): return self._properties.values() def properties_for(self, index): -# already_seen = dict() -# for ni in index: -# if ni not in already_seen: -# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]] -# yield already_seen[ni] return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index) def add(self, prop, indices): @@ -111,70 +106,11 @@ class ParameterIndexOperations(object): self._properties[prop] = diff else: del self._properties[prop] - #[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]] return removed.astype(int) -# else: -# for a in self.properties(): -# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_: -# ind = create_raveled_indices(indices, shape, offset) -# diff = remove_indices(self[a], ind) -# removed = numpy.intersect1d(self[a], ind, True) -# if not index_empty(diff): -# self._properties[a] = diff -# else: -# del self._properties[a] -# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]] -# return removed.astype(int) return numpy.array([]).astype(int) def __getitem__(self, prop): return self._properties[prop] -# class TieIndexOperations(object): -# def __init__(self, params): -# self.params = params -# self.tied_from = ParameterIndexOperations() -# self.tied_to = ParameterIndexOperations() -# def add(self, tied_from, tied_to): -# rav_from = self.params._raveled_index_for(tied_from) -# rav_to = self.params._raveled_index_for(tied_to) -# self.tied_from.add(tied_to, rav_from) -# self.tied_to.add(tied_to, rav_to) -# return rav_from, rav_to -# def remove(self, tied_from, tied_to): -# rav_from = self.params._raveled_index_for(tied_from) -# rav_to = self.params._raveled_index_for(tied_to) -# rem_from = self.tied_from.remove(tied_to, rav_from) -# rem_to = self.tied_to.remove(tied_to, rav_to) -# left_from = self.tied_from._properties.pop(tied_to) -# left_to = self.tied_to._properties.pop(tied_to) -# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from -# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to -# return rav_from, rav_to -# def from_to_for(self, index): -# return self.tied_from.properties_for(index), self.tied_to.properties_for(index) -# def iter_from_to_indices(self): -# for k, f in self.tied_from.iteritems(): -# yield f, self.tied_to[k] -# def iter_to_indices(self): -# return self.tied_to.iterindices() -# def iter_from_indices(self): -# return self.tied_from.iterindices() -# def iter_from_items(self): -# for f, i in self.tied_from.iteritems(): -# yield f, i -# def iter_properties(self): -# return self.tied_from.iter_properties() -# def properties(self): -# return self.tied_from.properties() -# def from_to_indices(self, param): -# return self.tied_from[param], self.tied_to[param] -# -# # def create_raveled_indices(index, shape, offset=0): -# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index) -# # else: i = [slice(None), index] -# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset) -# # return ind - def combine_indices(arr1, arr2): return numpy.union1d(arr1, arr2) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 462210dc..c0dd3fea 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,19 +4,19 @@ import itertools import numpy from parameter_core import Constrainable, adjust_name_for_printing -from array_core import ObservableArray +from array_core import ObservableArray, ParamList ###### printing __constraints_name__ = "Constraint" __index_name__ = "Index" __tie_name__ = "Tied to" -__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all +__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all __print_threshold__ = 5 ###### class Float(numpy.float64, Constrainable): def __init__(self, f, base): - super(Float,self).__init__(f) + super(Float, self).__init__(f) self._base = base @@ -50,7 +50,7 @@ class Param(ObservableArray, Constrainable): WARNING: This overrides the functionality of x==y!!! Use numpy.equal(x,y) for element-wise equality testing. """ - __array_priority__ = 0 # Never give back Param + __array_priority__ = 0 # Never give back Param _fixes_ = None def __new__(cls, name, input_array, *args, **kwargs): obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) @@ -75,7 +75,6 @@ class Param(ObservableArray, Constrainable): super(Param, self).__array_finalize__(obj) self._direct_parent_ = getattr(obj, '_direct_parent_', None) self._parent_index_ = getattr(obj, '_parent_index_', None) - self._highest_parent_ = getattr(obj, '_highest_parent_', None) self._current_slice_ = getattr(obj, '_current_slice_', None) self._tied_to_me_ = getattr(obj, '_tied_to_me_', None) self._tied_to_ = getattr(obj, '_tied_to_', None) @@ -94,11 +93,10 @@ class Param(ObservableArray, Constrainable): #=========================================================================== def __reduce_ex__(self): func, args, state = super(Param, self).__reduce__() - return func, args, (state, + return func, args, (state, (self.name, self._direct_parent_, self._parent_index_, - self._highest_parent_, self._current_slice_, self._realshape_, self._realsize_, @@ -119,7 +117,6 @@ class Param(ObservableArray, Constrainable): self._realsize_ = state.pop() self._realshape_ = state.pop() self._current_slice_ = state.pop() - self._highest_parent_ = state.pop() self._parent_index_ = state.pop() self._direct_parent_ = state.pop() self.name = state.pop() @@ -153,8 +150,6 @@ class Param(ObservableArray, Constrainable): @property def _parameters_(self): return [] - def _connect_highest_parent(self, highest_parent): - self._highest_parent_ = highest_parent def _collect_gradient(self, target): target[:] = self.gradient.flat #=========================================================================== @@ -166,7 +161,7 @@ class Param(ObservableArray, Constrainable): :param warning: print a warning for overwriting constraints. """ - self._highest_parent_._fix(self,warning) + self._highest_parent_._fix(self, warning) fix = constrain_fixed def unconstrain_fixed(self): """ @@ -190,19 +185,19 @@ class Param(ObservableArray, Constrainable): Note: For now only one parameter can have ties, so all of a parameter will be removed, when re-tieing! """ - #Note: this method will tie to the parameter which is the last in + # Note: this method will tie to the parameter which is the last in # the chain of ties. Thus, if you tie to a tied parameter, # this tie will be created to the parameter the param is tied # to. - assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param,param.__class__) + assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__) param = numpy.atleast_1d(param) if param.size != 1: raise NotImplementedError, "Broadcast tying is not implemented yet" try: if self._original_: self[:] = param - else: # this happens when indexing created a copy of the array + else: # this happens when indexing created a copy of the array self._direct_parent_._get_original(self)[self._current_slice_] = param except ValueError: raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape)) @@ -248,7 +243,7 @@ class Param(ObservableArray, Constrainable): t_rav_i = t._raveled_index() tr_rav_i = tied_to_me._raveled_index() new_index = list(set(t_rav_i) | set(tr_rav_i)) - tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] + tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)] self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index()) del self._tied_to_me_[t] return @@ -261,7 +256,7 @@ class Param(ObservableArray, Constrainable): import ipdb;ipdb.set_trace() new_index = list(set(t_rav_i) - set(tr_rav_i)) if new_index: - tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)] + tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)] self._tied_to_me_[tmp] = self._tied_to_me_[t] del self._tied_to_me_[t] if len(self._tied_to_me_[tmp]) == 0: @@ -269,12 +264,12 @@ class Param(ObservableArray, Constrainable): else: del self._tied_to_me_[t] def _on_tied_parameter_changed(self, val, ind): - if not self._updated_: #not fast_array_equal(self, val[ind]): + if not self._updated_: # not fast_array_equal(self, val[ind]): val = numpy.atleast_1d(val) self._updated_ = True if self._original_: self.__setitem__(slice(None), val[ind], update=False) - else: # this happens when indexing created a copy of the array + else: # this happens when indexing created a copy of the array self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False) self._notify_tied_parameters() self._updated_ = False @@ -303,11 +298,11 @@ class Param(ObservableArray, Constrainable): def __getitem__(self, s, *args, **kwargs): if not isinstance(s, tuple): s = (s,) - if not reduce(lambda a,b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: + if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim: s += (Ellipsis,) new_arr = super(Param, self).__getitem__(s, *args, **kwargs) try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base - except AttributeError: pass# returning 0d array or float, double etc + except AttributeError: pass # returning 0d array or float, double etc return new_arr def __setitem__(self, s, val, update=True): super(Param, self).__setitem__(s, val, update=update) @@ -325,11 +320,11 @@ class Param(ObservableArray, Constrainable): continue if isinstance(si, slice): a = si.indices(self._realshape_[i])[0] - elif isinstance(si, (list,numpy.ndarray,tuple)): + elif isinstance(si, (list, numpy.ndarray, tuple)): a = si[0] else: a = si - if a<0: - a = self._realshape_[i]+a + if a < 0: + a = self._realshape_[i] + a internal_offset += a * extended_realshape[i] return internal_offset def _raveled_index(self, slice_index=None): @@ -337,8 +332,8 @@ class Param(ObservableArray, Constrainable): # of this object extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] ind = self._indices(slice_index) - if ind.ndim < 2: ind=ind[:,None] - return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape*x), 1, ind), dtype=int) + if ind.ndim < 2: ind = ind[:, None] + return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int) def _expand_index(self, slice_index=None): # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # it basically translates slices to their respective index arrays and turns negative indices around @@ -351,11 +346,11 @@ class Param(ObservableArray, Constrainable): if isinstance(a, slice): start, stop, step = a.indices(b) return numpy.r_[start:stop:step] - elif isinstance(a, (list,numpy.ndarray,tuple)): + elif isinstance(a, (list, numpy.ndarray, tuple)): a = numpy.asarray(a, dtype=int) - a[a<0] = b + a[a<0] - elif a<0: - a = b+a + a[a < 0] = b + a[a < 0] + elif a < 0: + a = b + a return numpy.r_[a] return numpy.r_[:b] return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) @@ -379,7 +374,7 @@ class Param(ObservableArray, Constrainable): #=========================================================================== @property def _description_str(self): - if self.size <= 1: return ["%f"%self] + if self.size <= 1: return ["%f" % self] else: return [str(self.shape)] def _parameter_names(self, add_name): return [self.name] @@ -391,31 +386,31 @@ class Param(ObservableArray, Constrainable): return [self.shape] @property def _constraints_str(self): - return [' '.join(map(lambda c: str(c[0]) if c[1].size==self._realsize_ else "{"+str(c[0])+"}", self._highest_parent_._constraints_iter_items(self)))] + return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))] @property def _ties_str(self): return [t._short() for t in self._tied_to_] or [''] @property def name_hirarchical(self): if self.has_parent(): - return self._direct_parent_.hirarchy_name()+adjust_name_for_printing(self.name) + return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) return adjust_name_for_printing(self.name) def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( x=self.name_hirarchical) - return name + super(Param, self).__repr__(*args,**kwargs) + return name + super(Param, self).__repr__(*args, **kwargs) def _ties_for(self, rav_index): - #size = sum(p.size for p in self._tied_to_) + # size = sum(p.size for p in self._tied_to_) ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param) for i, tied_to in enumerate(self._tied_to_): for t, ind in tied_to._tied_to_me_.iteritems(): if t._parent_index_ == self._parent_index_: - matches = numpy.where(rav_index[:,None] == t._raveled_index()[None, :]) + matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :]) tt_rav_index = tied_to._raveled_index() ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0] if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches] else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap') - return map(lambda a: sum(a,[]), zip(*[[[tie.flatten()] if tx!=None else [] for tx in t] for t,tie in zip(ties,self._tied_to_)])) + return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)])) def _constraints_for(self, rav_index): return self._highest_parent_._constraints_for(self, rav_index) def _indices(self, slice_index=None): @@ -425,12 +420,12 @@ class Param(ObservableArray, Constrainable): if isinstance(slice_index, (tuple, list)): clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) - and len(set(map(len,clean_curr_slice))) <= 1): + and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), - dtype=[('',int)]*self._realndim_,count=len(clean_curr_slice[0])).view((int, self._realndim_)) + dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_)) expanded_index = list(self._expand_index(slice_index)) return numpy.fromiter(itertools.product(*expanded_index), - dtype=[('',int)]*self._realndim_,count=reduce(lambda a,b: a*b.size,expanded_index,1)).view((int, self._realndim_)) + dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_)) def _max_len_names(self, gen, header): return reduce(lambda a, b:max(a, len(b)), gen, len(header)) def _max_len_values(self): @@ -443,9 +438,9 @@ class Param(ObservableArray, Constrainable): if self._realsize_ < 2: return name ind = self._indices() - if ind.size > 4: indstr = ','.join(map(str,ind[:2])) + "..." + ','.join(map(str,ind[-2:])) - else: indstr = ','.join(map(str,ind)) - return name+'['+indstr+']' + if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:])) + else: indstr = ','.join(map(str, ind)) + return name + '[' + indstr + ']' def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None): filter_ = self._current_slice_ vals = self.flat @@ -458,10 +453,10 @@ class Param(ObservableArray, Constrainable): if lx is None: lx = self._max_len_values() if li is None: li = self._max_len_index(indices) if lt is None: lt = self._max_len_names(ties, __tie_name__) - header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc,lx,li,lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing + header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing if not ties: ties = itertools.cycle(['']) - return "\n".join([header]+[" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc,lx,__precision__,li,lt, x=x, c=" ".join(map(str,c)), t=(t or ''), i=i) for i,x,c,t in itertools.izip(indices,vals,constr_matrix,ties)]) # return all the constraints with right indices - #except: return super(Param, self).__str__() + return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)]) # return all the constraints with right indices + # except: return super(Param, self).__str__() class ParamConcatenation(object): def __init__(self, params): @@ -472,22 +467,22 @@ class ParamConcatenation(object): See :py:class:`GPy.core.parameter.Param` for more details on constraining. """ - #self.params = params - self.params = [] + # self.params = params + self.params = ParamList([]) for p in params: for p in p.flattened_parameters: if p not in self.params: self.params.append(p) self._param_sizes = [p.size for p in self.params] startstops = numpy.cumsum([0] + self._param_sizes) - self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])] + self._param_slices_ = [slice(start, stop) for start, stop in zip(startstops, startstops[1:])] #=========================================================================== # Get/set items, enable broadcasting #=========================================================================== def __getitem__(self, s): ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; - params = [p._get_params()[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])] - if len(params)==1: return params[0] + params = [p._get_params()[ind[ps]] for p, ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])] + if len(params) == 1: return params[0] return ParamConcatenation(params) def __setitem__(self, s, val, update=True): ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; @@ -535,12 +530,12 @@ class ParamConcatenation(object): unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__ def untie(self, *ties): [param.untie(*ties) for param in self.params] - __lt__ = lambda self, val: self._vals()val - __ge__ = lambda self, val: self._vals()>=val + __lt__ = lambda self, val: self._vals() < val + __le__ = lambda self, val: self._vals() <= val + __eq__ = lambda self, val: self._vals() == val + __ne__ = lambda self, val: self._vals() != val + __gt__ = lambda self, val: self._vals() > val + __ge__ = lambda self, val: self._vals() >= val def __str__(self, *args, **kwargs): def f(p): ind = p._raveled_index() @@ -552,11 +547,11 @@ class ParamConcatenation(object): lx = max([p._max_len_values() for p in params]) li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)]) lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)]) - strings = [p.__str__(cm, i, tm, lc, lx, li, lt) for p, cm, i, tm in itertools.izip(params,constr_matrices,indices,ties_matrices)] + strings = [p.__str__(cm, i, tm, lc, lx, li, lt) for p, cm, i, tm in itertools.izip(params, constr_matrices, indices, ties_matrices)] return "\n".join(strings) - return "\n{}\n".format(" -"+"- | -".join(['-'*l for l in [li,lx,lc,lt]])).join(strings) + return "\n{}\n".format(" -" + "- | -".join(['-' * l for l in [li, lx, lc, lt]])).join(strings) def __repr__(self): - return "\n".join(map(repr,self.params)) + return "\n".join(map(repr, self.params)) if __name__ == '__main__': @@ -564,12 +559,12 @@ if __name__ == '__main__': from GPy.core.parameterized import Parameterized from GPy.core.parameter import Param - #X = numpy.random.randn(2,3,1,5,2,4,3) - X = numpy.random.randn(3,2) + # X = numpy.random.randn(2,3,1,5,2,4,3) + X = numpy.random.randn(3, 2) print "random done" p = Param("q_mean", X) p1 = Param("q_variance", numpy.random.rand(*p.shape)) - p2 = Param("Y", numpy.random.randn(p.shape[0],1)) + p2 = Param("Y", numpy.random.randn(p.shape[0], 1)) p3 = Param("variance", numpy.random.rand()) p4 = Param("lengthscale", numpy.random.rand(2)) @@ -577,19 +572,19 @@ if __name__ == '__main__': m = Parameterized() rbf = Parameterized(name='rbf') - rbf.add_parameter(p3,p4) - m.add_parameter(p,p1,rbf) + rbf.add_parameter(p3, p4) + m.add_parameter(p, p1, rbf) print "setting params" - #print m.q_v[3:5,[1,4,5]] + # print m.q_v[3:5,[1,4,5]] print "constraining variance" - #m[".*variance"].constrain_positive() - #print "constraining rbf" - #m.rbf_l.constrain_positive() - #m.q_variance[1,[0,5,11,19,2]].tie_to(m.rbf_v) - #m.rbf_v.tie_to(m.rbf_l[0]) - #m.rbf_l[0].tie_to(m.rbf_l[1]) - #m.q_v.tie_to(m.rbf_v) + # m[".*variance"].constrain_positive() + # print "constraining rbf" + # m.rbf_l.constrain_positive() + # m.q_variance[1,[0,5,11,19,2]].tie_to(m.rbf_v) + # m.rbf_v.tie_to(m.rbf_l[0]) + # m.rbf_l[0].tie_to(m.rbf_l[1]) + # m.q_v.tie_to(m.rbf_v) # m.rbf_l.tie_to(m.rbf_va) # pt = numpy.array(params._get_params_transformed()) # ptr = numpy.random.randn(*pt.shape) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index a826b10c..a06211fe 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -48,19 +48,24 @@ class Pickleable(object): #=============================================================================== class Parentable(object): - def __init__(self, direct_parent=None, highest_parent=None, parent_index=None): + def __init__(self, direct_parent=None, parent_index=None): super(Parentable,self).__init__() self._direct_parent_ = direct_parent self._parent_index_ = parent_index - self._highest_parent_ = highest_parent def has_parent(self): - return self._direct_parent_ is not None and self._highest_parent_ is not None + return self._direct_parent_ is not None + + @property + def _highest_parent_(self): + if self._direct_parent_ is None: + return self + return self._direct_parent_._highest_parent_ class Nameable(Parentable): _name = None - def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None): - super(Nameable,self).__init__(direct_parent, highest_parent, parent_index) + def __init__(self, name, direct_parent=None, parent_index=None): + super(Nameable,self).__init__(direct_parent, parent_index) self._name = name or self.__class__.__name__ @property diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 7abaf4a3..2f00fbe8 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -11,6 +11,7 @@ from param import ParamConcatenation, Param from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing from index_operations import ParameterIndexOperations,\ index_empty +from array_core import ParamList #=============================================================================== # Printing: @@ -69,7 +70,7 @@ class Parameterized(Constrainable, Pickleable, Observable): super(Parameterized, self).__init__(name=name) self._in_init_ = True self._constraints_ = None#ParameterIndexOperations() - self._parameters_ = [] + self._parameters_ = ParamList() self.size = sum(p.size for p in self._parameters_) if not self._has_fixes(): self._fixes_ = None @@ -188,10 +189,10 @@ class Parameterized(Constrainable, Pickleable, Observable): note: if it is a string object it will not (!) be regexp-matched automatically. """ - self._parameters_ = [p for p in self._parameters_ + self._parameters_ = ParamList([p for p in self._parameters_ if not (p._parent_index_ in names_params_indices or p.name in names_params_indices - or p in names_params_indices)] + or p in names_params_indices)]) self._connect_parameters() def parameters_changed(self): @@ -216,7 +217,6 @@ class Parameterized(Constrainable, Pickleable, Observable): for i,p in enumerate(self._parameters_): p._direct_parent_ = self p._parent_index_ = i - p._connect_highest_parent(self) not_unique = [] sizes.append(p.size+sizes[-1]) self._param_slices_.append(slice(sizes[-2], sizes[-1])) @@ -231,14 +231,6 @@ class Parameterized(Constrainable, Pickleable, Observable): self.__dict__[pname] = p self._added_names_.add(pname) - def _connect_highest_parent(self, highest_parent): - self._highest_parent_ = highest_parent - if not hasattr(self, "_parameters_") or len(self._parameters_) < 1: - # no parameters for this class - return - for p in self._parameters_: - p._connect_highest_parent(highest_parent) - #=========================================================================== # Pickling operations #=========================================================================== @@ -372,6 +364,8 @@ class Parameterized(Constrainable, Pickleable, Observable): that is an int array, containing the indexes for the flattened param inside this parameterized logic. """ + if isinstance(param, ParamConcatenation): + return numpy.hstack((self._raveled_index_for(p) for p in param.params)) return param._raveled_index() + self._offset_for(param) def _raveled_index(self): diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index e9868b82..b73e25da 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -3,10 +3,8 @@ Created on 6 Nov 2013 @author: maxz ''' -import numpy as np from parameterized import Parameterized from param import Param -from ...util.misc import param_to_array class Normal(Parameterized): ''' @@ -26,6 +24,7 @@ class Normal(Parameterized): See GPy.plotting.matplot_dep.variational_plots """ + import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import variational_plots + from ...plotting.matplot_dep import variational_plots return variational_plots.plot(self,*args) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 46fc6797..e2ba4912 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False): num_inducing = 5 if plot: output_dim = 1 - input_dim = 2 + input_dim = 3 else: - input_dim = 2 + input_dim = 1 output_dim = 25 # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) lengthscales = _np.random.rand(input_dim) k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) - + GPy.kern.white(input_dim, 0.01)) + #+ GPy.kern.white(input_dim, 0.01) + ) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T - lik = Gaussian(Y, normalize=True) - k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) - m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing) + m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) #=========================================================================== # randomly obstruct data with percentage p p = .8 Y_obstruct = Y.copy() Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan #=========================================================================== - m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) + #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) m.lengthscales = lengthscales if plot: import matplotlib.pyplot as pb m.plot() pb.title('PCA initialisation') - m2.plot() - pb.title('PCA initialisation') + #m2.plot() + #pb.title('PCA initialisation') if optimize: m.optimize('scg', messages=verbose) - m2.optimize('scg', messages=verbose) + #m2.optimize('scg', messages=verbose) if plot: m.plot() pb.title('After optimisation') - m2.plot() - pb.title('After optimisation') + #m2.plot() + #pb.title('After optimisation') - return m, m2 + return m def gplvm_oil_100(optimize=True, verbose=1, plot=True): import GPy @@ -264,16 +264,16 @@ def bgplvm_simulation(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) - m['noise'] = Y.var() / 100. + m.Gaussian_noise = Y.var() / 100. if optimize: print "Optimizing model:" m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - m.plot_X_1d("BGPLVM Latent Space 1D") + m.q.plot("BGPLVM Latent Space 1D") m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') return m diff --git a/GPy/inference/latent_function_inference/fitc.py b/GPy/inference/latent_function_inference/fitc.py index d5aa80bc..e4c01252 100644 --- a/GPy/inference/latent_function_inference/fitc.py +++ b/GPy/inference/latent_function_inference/fitc.py @@ -118,8 +118,8 @@ class FITC(SparseGP): _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) - self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z) - self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) + self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z) + self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:]) # the partial derivative vector for the likelihood if self.likelihood.num_params == 0: @@ -170,8 +170,8 @@ class FITC(SparseGP): return dL_dtheta def dL_dZ(self): - dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) - dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z) + dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X) + dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z) dL_dZ += self._dpsi1_dX dL_dZ += self._dKmm_dX return dL_dZ diff --git a/GPy/inference/latent_function_inference/varDTC.py b/GPy/inference/latent_function_inference/varDTC.py index 6d46ad14..07ae17c5 100644 --- a/GPy/inference/latent_function_inference/varDTC.py +++ b/GPy/inference/latent_function_inference/varDTC.py @@ -80,7 +80,7 @@ class VarDTC(object): # no backsubstitution because of bound explosion on tr(A) if not... LmInv, _ = dtrtri(Lm, lower=1) A = LmInv.T.dot(psi2_beta.dot(LmInv)) - print A.sum() + #print A.sum() else: if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 6a8bc745..c97807fb 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -160,6 +160,9 @@ class kern(Parameterized): return newkern + def __call__(self, X, X2=None): + return self.K(X, X2) + def __mul__(self, other): """ Here we overload the '*' operator. See self.prod for more information""" return self.prod(other) @@ -550,7 +553,7 @@ class Kern_check_dK_dX(Kern_check_model): Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) def _log_likelihood_gradients(self): - return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten() + return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() def _get_param_names(self): return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])] @@ -657,7 +660,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): except NotImplementedError: result=True if verbose: - print("dK_dX not implemented for " + kern.name) + print("gradients_X not implemented for " + kern.name) if result and verbose: print("Check passed.") if not result: @@ -673,7 +676,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): except NotImplementedError: result=True if verbose: - print("dK_dX not implemented for " + kern.name) + print("gradients_X not implemented for " + kern.name) if result and verbose: print("Check passed.") if not result: @@ -689,7 +692,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): except NotImplementedError: result=True if verbose: - print("dK_dX not implemented for " + kern.name) + print("gradients_X not implemented for " + kern.name) if result and verbose: print("Check passed.") if not result: diff --git a/GPy/kern/parts/Brownian.py b/GPy/kern/parts/Brownian.py index bdfa0df5..17f65cbd 100644 --- a/GPy/kern/parts/Brownian.py +++ b/GPy/kern/parts/Brownian.py @@ -51,7 +51,7 @@ class Brownian(Kernpart): def dKdiag_dtheta(self,dL_dKdiag,X,target): target += np.dot(X.flatten(), dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): raise NotImplementedError, "TODO" #target += self.variance #target -= self.variance*theta(X-X2.T) diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/parts/Matern32.py index 40da79f0..a95f0bcf 100644 --- a/GPy/kern/parts/Matern32.py +++ b/GPy/kern/parts/Matern32.py @@ -96,7 +96,7 @@ class Matern32(Kernpart): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(dL_dKdiag) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" if X2 is None: dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] @@ -105,8 +105,8 @@ class Matern32(Kernpart): else: dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) - target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) + target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) def dKdiag_dX(self, dL_dKdiag, X, target): pass diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/parts/Matern52.py index 4bf4a1a8..1f87fefb 100644 --- a/GPy/kern/parts/Matern52.py +++ b/GPy/kern/parts/Matern52.py @@ -96,7 +96,7 @@ class Matern52(Kernpart): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] @@ -104,8 +104,8 @@ class Matern52(Kernpart): else: dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*dL_dK.T[:,:,None],0) + gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) + target += np.sum(gradients_X*dL_dK.T[:,:,None],0) def dKdiag_dX(self,dL_dKdiag,X,target): pass diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/parts/eq_ode1.py index 70e3c49d..85bb6379 100644 --- a/GPy/kern/parts/eq_ode1.py +++ b/GPy/kern/parts/eq_ode1.py @@ -193,7 +193,7 @@ class Eq_ode1(Kernpart): def dKdiag_dtheta(self,dL_dKdiag,index,target): pass - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): pass def _extract_t_indices(self, X, X2=None, dL_dK=None): diff --git a/GPy/kern/parts/exponential.py b/GPy/kern/parts/exponential.py index f568b66b..7cd92aff 100644 --- a/GPy/kern/parts/exponential.py +++ b/GPy/kern/parts/exponential.py @@ -95,13 +95,13 @@ class Exponential(Kernpart): # NB: derivative of diagonal elements wrt lengthscale is 0 target[0] += np.sum(dL_dKdiag) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) - target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) + target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) def dKdiag_dX(self, dL_dKdiag, X, target): pass diff --git a/GPy/kern/parts/fixed.py b/GPy/kern/parts/fixed.py index 67baea91..dd5bdb85 100644 --- a/GPy/kern/parts/fixed.py +++ b/GPy/kern/parts/fixed.py @@ -34,7 +34,7 @@ class Fixed(Kernpart): def dK_dtheta(self, partial, X, X2, target): target += (partial * self.fixed_K).sum() - def dK_dX(self, partial, X, X2, target): + def gradients_X(self, partial, X, X2, target): pass def dKdiag_dX(self, partial, X, target): diff --git a/GPy/kern/parts/gibbs.py b/GPy/kern/parts/gibbs.py index f47144e1..717703ce 100644 --- a/GPy/kern/parts/gibbs.py +++ b/GPy/kern/parts/gibbs.py @@ -97,7 +97,7 @@ class Gibbs(Kernpart): target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping]) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """Derivative of the covariance matrix with respect to X.""" # First account for gradients arising from presence of X in exponent. self._K_computations(X, X2) @@ -105,8 +105,8 @@ class Gibbs(Kernpart): _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co - dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) - target += np.sum(dK_dX*dL_dK.T[:, :, None], 0) + gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) + target += np.sum(gradients_X*dL_dK.T[:, :, None], 0) # Now account for gradients arising from presence of X in lengthscale. self._dK_computations(dL_dK) if X2 is None: diff --git a/GPy/kern/parts/hetero.py b/GPy/kern/parts/hetero.py index d3939563..f48dddb4 100644 --- a/GPy/kern/parts/hetero.py +++ b/GPy/kern/parts/hetero.py @@ -90,7 +90,7 @@ class Hetero(Kernpart): """Gradient of diagonal of covariance with respect to parameters.""" target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """Derivative of the covariance matrix with respect to X.""" if X2==None or X2 is X: dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1] diff --git a/GPy/kern/parts/hierarchical.py b/GPy/kern/parts/hierarchical.py index c629f6b9..43dddd2d 100644 --- a/GPy/kern/parts/hierarchical.py +++ b/GPy/kern/parts/hierarchical.py @@ -55,14 +55,14 @@ class Hierarchical(Kernpart): [[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)] - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): raise NotImplementedError #X,slices = X[:,:-1],index_to_slices(X[:,-1]) #if X2 is None: #X2,slices2 = X,slices #else: #X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) - #[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + #[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] # def dKdiag_dX(self,dL_dKdiag,X,target): raise NotImplementedError diff --git a/GPy/kern/parts/independent_outputs.py b/GPy/kern/parts/independent_outputs.py index f88b0ff5..8c0959c5 100644 --- a/GPy/kern/parts/independent_outputs.py +++ b/GPy/kern/parts/independent_outputs.py @@ -79,13 +79,13 @@ class IndependentOutputs(Kernpart): [[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): X,slices = X[:,:-1],index_to_slices(X[:,-1]) if X2 is None: X2,slices2 = X,slices else: X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) - [[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] + [[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] def dKdiag_dX(self,dL_dKdiag,X,target): X,slices = X[:,:-1],index_to_slices(X[:,-1]) diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py index 8314e7a7..2583d525 100644 --- a/GPy/kern/parts/kernpart.py +++ b/GPy/kern/parts/kernpart.py @@ -20,7 +20,6 @@ class Kernpart(Parameterized): # the number of optimisable parameters # the name of the covariance function. # link to parameterized objects - self._parameters_ = [] #self._X = None def connect_input(self, X): @@ -106,7 +105,7 @@ class Kernpart(Parameterized): raise NotImplementedError def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): raise NotImplementedError - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): raise NotImplementedError def dKdiag_dX(self, dL_dK, X, target): raise NotImplementedError diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py index 62f1ac36..6ead4549 100644 --- a/GPy/kern/parts/linear.py +++ b/GPy/kern/parts/linear.py @@ -57,6 +57,27 @@ class Linear(Kernpart): def on_input_change(self, X): self._K_computations(X, None) + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + + # psi0: + tmp = dL_dpsi0[:, None] * self.mu2_S + if self.ARD: self.variances.gradient = tmp.sum(0) + else: self.variances.gradient = tmp.sum() + + #psi1 + self.dK_dtheta(dL_dpsi1, mu, Z, self.variances.gradient) + + #from psi2 + tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :]) + if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) + else: self.variances.gradient += tmp.sum() + + #from Kmm + self._K_computations(Z, None) + self.dK_dtheta(dL_dKmm, Z, None, self.variances.gradient) + + # def _get_params(self): # return self.variances # @@ -107,7 +128,7 @@ class Linear(Kernpart): else: target += tmp.sum() - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): if X2 is None: target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) else: @@ -150,7 +171,7 @@ class Linear(Kernpart): target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self.dK_dX(dL_dpsi1.T, Z, mu, target) + self.gradients_X(dL_dpsi1.T, Z, mu, target) def psi2(self, Z, mu, S, target): self._psi_computations(Z, mu, S) @@ -182,7 +203,7 @@ class Linear(Kernpart): def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): tmp = np.zeros((mu.shape[0], Z.shape[0])) self.K(mu,Z,tmp) - self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) + self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) Zs = Z*self.variances Zs_sq = Zs[:,None,:]*Zs[None,:,:] diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/parts/mlp.py index e68aaa72..2ba25802 100644 --- a/GPy/kern/parts/mlp.py +++ b/GPy/kern/parts/mlp.py @@ -107,7 +107,7 @@ class MLP(Kernpart): target[0] += np.sum(self._K_dvar*dL_dK) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """Derivative of the covariance matrix with respect to X""" self._K_computations(X, X2) arg = self._K_asin_arg diff --git a/GPy/kern/parts/poly.py b/GPy/kern/parts/poly.py index 98c520f0..80abab60 100644 --- a/GPy/kern/parts/poly.py +++ b/GPy/kern/parts/poly.py @@ -99,7 +99,7 @@ class POLY(Kernpart): target[2] += base_cov_grad.sum() - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): """Derivative of the covariance matrix with respect to X""" self._K_computations(X, X2) arg = self._K_poly_arg diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py index 62eed2aa..07286a82 100644 --- a/GPy/kern/parts/prod.py +++ b/GPy/kern/parts/prod.py @@ -81,21 +81,21 @@ class Prod(Kernpart): self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) if X2 is None: if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): - self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) + self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): - #NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei] + #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei] X2 = X - self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) + self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) else: - self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) @@ -103,8 +103,8 @@ class Prod(Kernpart): self.k1.Kdiag(X[:,self.slice1],K1) self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) - self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) + self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) def _K_computations(self,X,X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): diff --git a/GPy/kern/parts/prod_orthogonal.py b/GPy/kern/parts/prod_orthogonal.py index 237c9557..f8d1c3b2 100644 --- a/GPy/kern/parts/prod_orthogonal.py +++ b/GPy/kern/parts/prod_orthogonal.py @@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart): self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:]) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) - self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target) - self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target) + self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target) def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) @@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart): self.k1.Kdiag(X[:,0:self.k1.input_dim],K1) self.k2.Kdiag(X[:,self.k1.input_dim:],K2) - self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target) - self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target) + self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target) + self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target) def _K_computations(self,X,X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): diff --git a/GPy/kern/parts/rational_quadratic.py b/GPy/kern/parts/rational_quadratic.py index a75a5b11..bd623320 100644 --- a/GPy/kern/parts/rational_quadratic.py +++ b/GPy/kern/parts/rational_quadratic.py @@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart): target[0] += np.sum(dL_dKdiag) # here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: dist2 = np.square((X-X.T)/self.lengthscale) diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/parts/rbf.py index 4247eb9c..e7bc8624 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/parts/rbf.py @@ -51,8 +51,11 @@ class RBF(Kernpart): lengthscale = np.ones(self.input_dim) self.variance = Param('variance', variance) + self.lengthscale = Param('lengthscale', lengthscale) self.lengthscale.add_observer(self, self.update_lengthscale) + self.update_lengthscale(self.lengthscale) + self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() # initializes cache @@ -114,7 +117,7 @@ class RBF(Kernpart): self._K_computations(X, Z) self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) if self.ARD: - self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) + self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) else: self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) @@ -123,7 +126,7 @@ class RBF(Kernpart): self._K_computations(Z, None) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) + self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) @@ -157,7 +160,7 @@ class RBF(Kernpart): self._K_computations(Z, None) self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) + self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) @@ -168,8 +171,8 @@ class RBF(Kernpart): _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) + target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) def dKdiag_dX(self, dL_dKdiag, X, target): pass diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/parts/rbf_inv.py index c4461267..0c0168a6 100644 --- a/GPy/kern/parts/rbf_inv.py +++ b/GPy/kern/parts/rbf_inv.py @@ -142,14 +142,14 @@ class RBFInv(RBF): else: target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2) - def dK_dX(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2, target): self._K_computations(X, X2) if X2 is None: _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) + target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) def dKdiag_dX(self, dL_dKdiag, X, target): pass diff --git a/GPy/kern/parts/rbfcos.py b/GPy/kern/parts/rbfcos.py index b6411e0a..fc4a376a 100644 --- a/GPy/kern/parts/rbfcos.py +++ b/GPy/kern/parts/rbfcos.py @@ -88,7 +88,7 @@ class RBFCos(Kernpart): def dKdiag_dtheta(self,dL_dKdiag,X,target): target[0] += np.sum(dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): #TODO!!! raise NotImplementedError diff --git a/GPy/kern/parts/ss_rbf.py b/GPy/kern/parts/ss_rbf.py index a234d428..cab8fd11 100644 --- a/GPy/kern/parts/ss_rbf.py +++ b/GPy/kern/parts/ss_rbf.py @@ -144,8 +144,8 @@ class SS_RBF(Kernpart): _K_dist = 2*(X[:, None, :] - X[None, :, :]) else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) + gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) + target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) def dKdiag_dX(self, dL_dKdiag, X, target): pass diff --git a/GPy/kern/parts/symmetric.py b/GPy/kern/parts/symmetric.py index d47fbd9d..ef9a8dd5 100644 --- a/GPy/kern/parts/symmetric.py +++ b/GPy/kern/parts/symmetric.py @@ -54,7 +54,7 @@ class Symmetric(Kernpart): self.k.dK_dtheta(dL_dK,AX,AX2,target) - def dK_dX(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" AX = np.dot(X,self.transform) if X2 is None: @@ -62,10 +62,10 @@ class Symmetric(Kernpart): ZX2 = AX else: AX2 = np.dot(X2, self.transform) - self.k.dK_dX(dL_dK, X, X2, target) - self.k.dK_dX(dL_dK, AX, X2, target) - self.k.dK_dX(dL_dK, X, AX2, target) - self.k.dK_dX(dL_dK, AX ,AX2, target) + self.k.gradients_X(dL_dK, X, X2, target) + self.k.gradients_X(dL_dK, AX, X2, target) + self.k.gradients_X(dL_dK, X, AX2, target) + self.k.gradients_X(dL_dK, AX ,AX2, target) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py index ea603eab..46f975d2 100644 --- a/GPy/kern/parts/sympykern.py +++ b/GPy/kern/parts/sympykern.py @@ -357,7 +357,7 @@ class spkern(Kernpart): def dKdiag_dtheta(self,partial,X,target): self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial) - def dK_dX(self,partial,X,Z,target): + def gradients_X(self,partial,X,Z,target): if Z is None: self._weave_inline(self._dK_dX_code_X, X, target, Z, partial) else: diff --git a/GPy/mappings/kernel.py b/GPy/mappings/kernel.py index ccd1462a..94ce203f 100644 --- a/GPy/mappings/kernel.py +++ b/GPy/mappings/kernel.py @@ -57,4 +57,4 @@ class Kernel(Mapping): return np.hstack((self._df_dA.flatten(), self._df_dbias)) def df_dX(self, dL_df, X): - return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) + return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 7a22b5ea..78851147 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -2,7 +2,6 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import itertools from gplvm import GPLVM from .. import kern from ..core import SparseGP @@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, - Z=None, kernel=None, name='bayesian gplvm', **kwargs): - if type(likelihood_or_Y) is np.ndarray: - likelihood = Gaussian(likelihood_or_Y) - else: - likelihood = likelihood_or_Y - + def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, + Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs): if X == None: - X = self.initialise_latent(init, input_dim, likelihood.Y) + X = self.initialise_latent(init, input_dim, Y) self.init = init if X_variance is None: @@ -44,9 +38,9 @@ class BayesianGPLVM(SparseGP, GPLVM): if kernel is None: kernel = kern.rbf(input_dim) # + kern.white(input_dim) - SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs) - self.q = Normal(self.X, self.X_variance) - self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0) + self.q = Normal(X, X_variance) + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs) + self.add_parameter(self.q, index=0) self.ensure_default_constraints() def _getstate(self): @@ -94,9 +88,9 @@ class BayesianGPLVM(SparseGP, GPLVM): return dKL_dmu, dKL_dS def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance) + dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance) + dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) + dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 @@ -107,10 +101,25 @@ class BayesianGPLVM(SparseGP, GPLVM): var_S = np.sum(self.X_variance - np.log(self.X_variance)) return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data - def log_likelihood(self): - ll = SparseGP.log_likelihood(self) - kl = self.KL_divergence() - return ll - kl + def parameters_changed(self): + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) + self._log_marginal_likelihood -= self.KL_divergence() + + #The derivative of the bound wrt the inducing inputs Z + self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) + self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) + + dL_dmu, dL_dS = self.dL_dmuS() + dKL_dmu, dKL_dS = self.dKL_dmuS() + self.q.means.gradient = dL_dmu - dKL_dmu + self.q.variances.gradient = dL_dS - dKL_dS + + +# def log_likelihood(self): +# ll = SparseGP.log_likelihood(self) +# kl = self.KL_divergence() +# return ll - kl def _dbound_dmuS(self): dKL_dmu, dKL_dS = self.dKL_dmuS() @@ -181,18 +190,18 @@ class BayesianGPLVM(SparseGP, GPLVM): """ dmu_dX = np.zeros_like(Xnew) for i in range(self.Z.shape[0]): - dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) + dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :]) return dmu_dX def dmu_dXnew(self, Xnew): """ Individual gradient of prediction at Xnew w.r.t. each sample in Xnew """ - dK_dX = np.zeros((Xnew.shape[0], self.num_inducing)) + gradients_X = np.zeros((Xnew.shape[0], self.num_inducing)) ones = np.ones((1, 1)) for i in range(self.Z.shape[0]): - dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) - return np.dot(dK_dX, self.Cpsi1Vf) + gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1) + return np.dot(gradients_X, self.Cpsi1Vf) def plot_steepest_gradient_map(self, *args, ** kwargs): """ diff --git a/GPy/models/bcgplvm.py b/GPy/models/bcgplvm.py index 9f5866c3..f21a01f4 100644 --- a/GPy/models/bcgplvm.py +++ b/GPy/models/bcgplvm.py @@ -44,7 +44,7 @@ class BCGPLVM(GPLVM): GP._set_params(self, x[self.mapping.num_params:]) def _log_likelihood_gradients(self): - dL_df = self.kern.dK_dX(self.dL_dK, self.X) + dL_df = self.kern.gradients_X(self.dL_dK, self.X) dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y) return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self))) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index fc328ff2..06481b81 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -60,7 +60,7 @@ class GPLVM(GP): def jacobian(self,X): target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) for i in range(self.output_dim): - target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) + target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X) return target def magnification(self,X): diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py index e3024264..5c10d0b8 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/models/sparse_gplvm.py @@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM): def dL_dX(self): dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) - dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z) + dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z) return dL_dX diff --git a/GPy/plotting/matplot_dep/variational_plots.py b/GPy/plotting/matplot_dep/variational_plots.py index 9f791dd1..7c89a088 100644 --- a/GPy/plotting/matplot_dep/variational_plots.py +++ b/GPy/plotting/matplot_dep/variational_plots.py @@ -1,4 +1,5 @@ -import pylab as pb +import pylab as pb, numpy as np +from ...util.misc import param_to_array def plot(parameterized, fignum=None, ax=None, colors=None): """ diff --git a/GPy/util/caching.py b/GPy/util/caching.py index d8893021..374f9600 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,10 +1,9 @@ -import numpy as np -from ..core.parameterization.array_core import ObservableArray +from ..core.parameterization.array_core import ObservableArray, ParamList class Cacher(object): def __init__(self, operation, limit=5): self.limit = int(limit) self.operation=operation - self.cached_inputs = [] + self.cached_inputs = ParamList([]) self.cached_outputs = [] self.inputs_changed = []