diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index 839529d6..a42d76ed 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -2,7 +2,9 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from model import * -from parameterization.parameterized import * +from parameterization.parameterized import adjust_name_for_printing, Parameterizable +from parameterization.param import Param, ParamConcatenation + from gp import GP from sparse_gp import SparseGP from svigp import SVIGP diff --git a/GPy/core/model.py b/GPy/core/model.py index 55083aaf..c067d51d 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -4,12 +4,8 @@ from .. import likelihoods from ..inference import optimization -from ..util.linalg import jitchol from ..util.misc import opt_wrapper from parameterization import Parameterized -from parameterization.parameterized import UNFIXED -from parameterization.domains import _POSITIVE, _REAL -from parameterization.index_operations import ParameterIndexOperations import multiprocessing as mp import numpy as np from numpy.linalg.linalg import LinAlgError @@ -240,7 +236,7 @@ class Model(Parameterized): constrained positive. """ raise DeprecationWarning, 'parameters now have default constraints' - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] + #positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] # param_names = self._get_param_names() # for s in positive_strings: diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 9f2c7ae6..7892e94a 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -36,6 +36,8 @@ class ObservableArray(np.ndarray, Observable): # see InfoArray.__array_finalize__ for comments if obj is None: return self._observers_ = getattr(obj, '_observers_', None) + def __array_wrap__(self, out_arr, context=None): + return out_arr.view(np.ndarray) def _s_not_empty(self, s): # this checks whether there is something picked by this slice. @@ -68,11 +70,6 @@ class ObservableArray(np.ndarray, Observable): def copy(self, *args): return self.__copy__(*args) - def __ror__(self, *args, **kwargs): - r = np.ndarray.__ror__(self, *args, **kwargs) - self._notify_observers() - return r - def __ilshift__(self, *args, **kwargs): r = np.ndarray.__ilshift__(self, *args, **kwargs) self._notify_observers() @@ -83,71 +80,19 @@ class ObservableArray(np.ndarray, Observable): self._notify_observers() return r - def __rrshift__(self, *args, **kwargs): - r = np.ndarray.__rrshift__(self, *args, **kwargs) - self._notify_observers() - return r def __ixor__(self, *args, **kwargs): r = np.ndarray.__ixor__(self, *args, **kwargs) self._notify_observers() return r - def __rxor__(self, *args, **kwargs): - r = np.ndarray.__rxor__(self, *args, **kwargs) - self._notify_observers() - return r - - - - def __rdivmod__(self, *args, **kwargs): - r = np.ndarray.__rdivmod__(self, *args, **kwargs) - self._notify_observers() - return r - - - def __radd__(self, *args, **kwargs): - r = np.ndarray.__radd__(self, *args, **kwargs) - self._notify_observers() - return r - - - def __rdiv__(self, *args, **kwargs): - r = np.ndarray.__rdiv__(self, *args, **kwargs) - self._notify_observers() - return r - - - def __rtruediv__(self, *args, **kwargs): - r = np.ndarray.__rtruediv__(self, *args, **kwargs) - self._notify_observers() - return r - def __ipow__(self, *args, **kwargs): r = np.ndarray.__ipow__(self, *args, **kwargs) self._notify_observers() return r - - def __rmul__(self, *args, **kwargs): - r = np.ndarray.__rmul__(self, *args, **kwargs) - self._notify_observers() - return r - - - def __rpow__(self, *args, **kwargs): - r = np.ndarray.__rpow__(self, *args, **kwargs) - self._notify_observers() - return r - - - def __rsub__(self, *args, **kwargs): - r = np.ndarray.__rsub__(self, *args, **kwargs) - self._notify_observers() - return r - - + def __ifloordiv__(self, *args, **kwargs): r = np.ndarray.__ifloordiv__(self, *args, **kwargs) self._notify_observers() @@ -178,12 +123,6 @@ class ObservableArray(np.ndarray, Observable): return r - def __rfloordiv__(self, *args, **kwargs): - r = np.ndarray.__rfloordiv__(self, *args, **kwargs) - self._notify_observers() - return r - - def __iand__(self, *args, **kwargs): r = np.ndarray.__iand__(self, *args, **kwargs) self._notify_observers() @@ -208,8 +147,74 @@ class ObservableArray(np.ndarray, Observable): return r - def __rshift__(self, *args, **kwargs): - r = np.ndarray.__rshift__(self, *args, **kwargs) - self._notify_observers() - return r +# def __rrshift__(self, *args, **kwargs): +# r = np.ndarray.__rrshift__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __ror__(self, *args, **kwargs): +# r = np.ndarray.__ror__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rxor__(self, *args, **kwargs): +# r = np.ndarray.__rxor__(self, *args, **kwargs) +# self._notify_observers() +# return r + + + +# def __rdivmod__(self, *args, **kwargs): +# r = np.ndarray.__rdivmod__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __radd__(self, *args, **kwargs): +# r = np.ndarray.__radd__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rdiv__(self, *args, **kwargs): +# r = np.ndarray.__rdiv__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rtruediv__(self, *args, **kwargs): +# r = np.ndarray.__rtruediv__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rshift__(self, *args, **kwargs): +# r = np.ndarray.__rshift__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rmul__(self, *args, **kwargs): +# r = np.ndarray.__rmul__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rpow__(self, *args, **kwargs): +# r = np.ndarray.__rpow__(self, *args, **kwargs) +# self._notify_observers() +# return r + + +# def __rsub__(self, *args, **kwargs): +# r = np.ndarray.__rsub__(self, *args, **kwargs) +# self._notify_observers() +# return r + +# def __rfloordiv__(self, *args, **kwargs): +# r = np.ndarray.__rfloordiv__(self, *args, **kwargs) +# self._notify_observers() +# return r diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 37e710a8..75d9faf2 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -3,7 +3,7 @@ import itertools import numpy -from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing +from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from array_core import ObservableArray, ParamList ###### printing @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable): +class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable): """ Parameter object for GPy models. @@ -80,8 +80,6 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self.constraints = getattr(obj, 'constraints', None) self.priors = getattr(obj, 'priors', None) - #def __array_wrap__(self, out_arr, context=None): - # return out_arr.view(numpy.ndarray) #=========================================================================== # Pickling operations #=========================================================================== @@ -116,7 +114,14 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self._parent_index_ = state.pop() self._direct_parent_ = state.pop() self.name = state.pop() - + + def copy(self, *args): + constr = self.constraints.copy() + priors = self.priors.copy() + p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) + p.constraints = constr + p.priors = priors + return p #=========================================================================== # get/set parameters #=========================================================================== @@ -216,7 +221,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri def _description_str(self): if self.size <= 1: return ["%f" % self] else: return [str(self.shape)] - def parameter_names(self, add_name=False): + def parameter_names(self, add_self=False, adjust_for_printing=False): + if adjust_for_printing: + return [adjust_name_for_printing(self.name)] return [self.name] @property def flattened_parameters(self): @@ -233,14 +240,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri @property def _ties_str(self): return [t._short() for t in self._tied_to_] or [''] - @property - def name_hirarchical(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - return adjust_name_for_printing(self.name) def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( - x=self.name_hirarchical) + x=self.hirarchy_name()) return name + super(Param, self).__repr__(*args, **kwargs) def _ties_for(self, rav_index): # size = sum(p.size for p in self._tied_to_) @@ -274,12 +276,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri gen = map(lambda x: " ".join(map(str, x)), gen) return reduce(lambda a, b:max(a, len(b)), gen, len(header)) def _max_len_values(self): - return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical)) + return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name())) def _max_len_index(self, ind): return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) def _short(self): # short string to print - name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + name = self.hirarchy_name() if self._realsize_ < 2: return name ind = self._indices() @@ -302,8 +304,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri if lp is None: lp = self._max_len_names(prirs, __tie_name__) sep = '-' header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" - if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing - else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing + if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing + else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing if not ties: ties = itertools.cycle(['']) return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices # except: return super(Param, self).__str__() @@ -335,19 +337,20 @@ class ParamConcatenation(object): if len(params)==1: return params[0] return ParamConcatenation(params) def __setitem__(self, s, val, update=True): + if isinstance(val, ParamConcatenation): + val = val._vals() ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; vals = self._vals(); vals[s] = val; del val - [numpy.place(p, ind[ps], vals[ps])# and p._notify_tied_parameters() + [numpy.place(p, ind[ps], vals[ps]) and update and p._notify_parameters_changed() for p, ps in zip(self.params, self._param_slices_)] - if update: - self.params[0]._notify_parameters_changed() def _vals(self): return numpy.hstack([p._get_params() for p in self.params]) #=========================================================================== # parameter operations: #=========================================================================== def update_all_params(self): - self.params[0]._notify_parameters_changed() + for p in self.params: + p._notify_parameters_changed() def constrain(self, constraint, warning=True): [param.constrain(constraint, update=False) for param in self.params] diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..9a10f317 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -68,6 +68,10 @@ class Parentable(object): return self return self._direct_parent_._highest_parent_ + def _notify_parameters_changed(self): + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + class Nameable(Parentable): _name = None def __init__(self, name, direct_parent=None, parent_index=None): @@ -80,22 +84,56 @@ class Nameable(Parentable): @name.setter def name(self, name): from_name = self.name + assert isinstance(name, str) self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) - + self._direct_parent_._name_changed(self, from_name) + def hirarchy_name(self, adjust_for_printing=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if self.has_parent(): + return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) + return adjust(self.name) class Parameterizable(Parentable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() + self._added_names_ = set() - def parameter_names(self, add_name=False): - if add_name: - return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] - return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names + + def _add_parameter_name(self, param): + pname = adjust_name_for_printing(param.name) + # and makes sure to not delete programmatically added parameters + if pname in self.__dict__: + if not (param is self.__dict__[pname]): + if pname in self._added_names_: + del self.__dict__[pname] + self._add_parameter_name(param) + else: + self.__dict__[pname] = param + self._added_names_.add(pname) + + def _remove_parameter_name(self, param=None, pname=None): + assert param is None or pname is None, "can only delete either param by name, or the name of a param" + pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) + if pname in self._added_names_: + del self.__dict__[pname] + self._added_names_.remove(pname) + self._connect_parameters() + def _name_changed(self, param, old_name): + self._remove_parameter_name(None, old_name) + self._add_parameter_name(param) + def _collect_gradient(self, target): import itertools [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] @@ -113,6 +151,38 @@ class Parameterizable(Parentable): [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] self.parameters_changed() + def copy(self): + """Returns a (deep) copy of the current model""" + import copy + from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView + from .array_core import ParamList + dc = dict() + for k, v in self.__dict__.iteritems(): + if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): + if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + dc[k] = v.copy() + else: + dc[k] = copy.deepcopy(v) + if k == '_parameters_': + params = [p.copy() for p in v] + #dc = copy.deepcopy(self.__dict__) + dc['_direct_parent_'] = None + dc['_parent_index_'] = None + dc['_parameters_'] = ParamList() + s = self.__new__(self.__class__) + s.__dict__ = dc + #import ipdb;ipdb.set_trace() + for p in params: + s.add_parameter(p) + #dc._notify_parent_change() + return s + #return copy.deepcopy(self) + + def _notify_parameters_changed(self): + self.parameters_changed() + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + def parameters_changed(self): """ This method gets called when parameters have changed. @@ -122,11 +192,6 @@ class Parameterizable(Parentable): """ pass - def _notify_parameters_changed(self): - self.parameters_changed() - if self.has_parent(): - self._direct_parent_._notify_parameters_changed() - class Gradcheckable(Parentable): #=========================================================================== @@ -157,7 +222,7 @@ class Indexable(object): """ raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable, Parameterizable): +class Constrainable(Nameable, Indexable, Parentable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) self._default_constraint_ = default_constraint @@ -167,6 +232,16 @@ class Constrainable(Nameable, Indexable, Parameterizable): if self._default_constraint_ is not None: self.constrain(self._default_constraint_) + def _disconnect_parent(self, constr=None): + if constr is None: + constr = self.constraints.copy() + self.constraints.clear() + self.constraints = constr + self._direct_parent_ = None + self._parent_index_ = None + self._connect_fixes() + self._notify_parent_change() + #=========================================================================== # Fixing Parameters: #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index c8a841c0..12bf936c 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -3,16 +3,15 @@ import numpy; np = numpy -import copy import cPickle import itertools from re import compile, _pattern_type -from param import ParamConcatenation, Param -from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable -from transformations import __fixed__, FIXED, UNFIXED +from param import ParamConcatenation +from parameter_core import Constrainable, Pickleable, Observable, Parameterizable, Parentable, adjust_name_for_printing, Gradcheckable +from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): +class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable, Parameterizable, Parentable): """ Parameterized class @@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): self._fixes_ = None self._param_slices_ = [] self._connect_parameters() - self._added_names_ = set() del self._in_init_ def add_parameter(self, param, index=None): @@ -117,17 +115,10 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) del self._parameters_[param._parent_index_] self.size -= param.size - constr = param.constraints.copy() - param.constraints.clear() - param.constraints = constr - param._direct_parent_ = None - param._parent_index_ = None - param._connect_fixes() - param._notify_parent_change() - pname = adjust_name_for_printing(param.name) - if pname in self._added_names_: - del self.__dict__[pname] - self._connect_parameters() + + param._disconnect_parent() + self._remove_parameter_name(param) + #self._notify_parent_change() self._connect_fixes() @@ -145,19 +136,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): for i, p in enumerate(self._parameters_): p._direct_parent_ = self p._parent_index_ = i - not_unique = [] sizes.append(p.size + sizes[-1]) self._param_slices_.append(slice(sizes[-2], sizes[-1])) - pname = adjust_name_for_printing(p.name) - # and makes sure to not delete programmatically added parameters - if pname in self.__dict__: - if isinstance(self.__dict__[pname], (Parameterized, Param)): - if not p is self.__dict__[pname]: - not_unique.append(pname) - del self.__dict__[pname] - elif not (pname in not_unique): - self.__dict__[pname] = p - self._added_names_.add(pname) + self._add_parameter_name(p) #=========================================================================== # Pickling operations @@ -174,19 +155,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): cPickle.dump(self, f, protocol) else: cPickle.dump(self, f, protocol) - def copy(self): - """Returns a (deep) copy of the current model """ - # dc = dict() - # for k, v in self.__dict__.iteritems(): - # if k not in ['_highest_parent_', '_direct_parent_']: - # dc[k] = copy.deepcopy(v) - # dc = copy.deepcopy(self.__dict__) - # dc['_highest_parent_'] = None - # dc['_direct_parent_'] = None - # s = self.__class__.new() - # s.__dict__ = dc - return copy.deepcopy(self) def __getstate__(self): if self._has_get_set_state(): return self._getstate() @@ -243,7 +212,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): # Optimization handles: #=========================================================================== def _get_param_names(self): - n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) + n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) return n def _get_param_names_transformed(self): n = self._get_param_names() @@ -265,14 +234,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] return p - def _name_changed(self, param, old_name): - if hasattr(self, old_name) and old_name in self._added_names_: - delattr(self, old_name) - self._added_names_.remove(old_name) - pname = adjust_name_for_printing(param.name) - if pname not in self.__dict__: - self._added_names_.add(pname) - self.__dict__[pname] = param #=========================================================================== # Indexable Handling #=========================================================================== @@ -335,10 +296,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): # you can retrieve the original param through this method, by passing # the copy here return self._parameters_[param._parent_index_] - def hirarchy_name(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "." - return '' #=========================================================================== # Get/set parameters: #=========================================================================== @@ -348,13 +305,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): """ if not isinstance(regexp, _pattern_type): regexp = compile(regexp) found_params = [] - for p in self._parameters_: - if regexp.match(p.name) is not None: + for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters): + if regexp.match(n) is not None: found_params.append(p) - if isinstance(p, Parameterized): - found_params.extend(p.grep_param_names(regexp)) return found_params - return [param for param in self._parameters_ if regexp.match(param.name) is not None] + def __getitem__(self, name, paramlist=None): if paramlist is None: paramlist = self.grep_param_names(name) @@ -366,36 +321,22 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): return ParamConcatenation(paramlist) return paramlist[-1] return ParamConcatenation(paramlist) + def __setitem__(self, name, value, paramlist=None): try: param = self.__getitem__(name, paramlist) except AttributeError as a: raise a param[:] = value -# def __getattr__(self, name): -# return self.__getitem__(name) -# def __getattribute__(self, name): -# #try: -# return object.__getattribute__(self, name) - # except AttributeError: - # _, a, tb = sys.exc_info() - # try: - # return self.__getitem__(name) - # except AttributeError: - # raise AttributeError, a.message, tb def __setattr__(self, name, val): - # override the default behaviour, if setting a param, so broadcasting can by used - if hasattr(self, "_parameters_"): - paramlist = self.grep_param_names(name) - if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return + # override the default behaviour, if setting a param, so broadcasting can by used + if hasattr(self, '_parameters_'): + pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) + if name in pnames: self._parameters_[pnames.index(name)][:] = val; return object.__setattr__(self, name, val); #=========================================================================== # Printing: #=========================================================================== def _short(self): - # short string to print - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - else: - return adjust_name_for_printing(self.name) + return self.hirarchy_name() @property def flattened_parameters(self): return [xi for x in self._parameters_ for xi in x.flattened_parameters] diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index dd0f6c2c..1d436c53 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -2,12 +2,11 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, dtrtrs, dpotrs, dpotri +from ..util.linalg import mdot from gp import GP from parameterization.param import Param -from ..inference.latent_function_inference import varDTC +from GPy.inference.latent_function_inference import var_dtc from .. import likelihoods -from GPy.util.misc import param_to_array class SparseGP(GP): """ @@ -34,12 +33,12 @@ class SparseGP(GP): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'): - #pick a sensible inference method + # pick a sensible inference method if inference_method is None: if isinstance(likelihood, likelihoods.Gaussian): - inference_method = varDTC.VarDTC() + inference_method = var_dtc.VarDTC() else: - #inference_method = ?? + # inference_method = ?? raise NotImplementedError, "what to do what to do?" print "defaulting to ", inference_method, "for latent function inference" @@ -55,7 +54,7 @@ class SparseGP(GP): self.parameters_changed() def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + # The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) @@ -78,13 +77,14 @@ class SparseGP(GP): mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx), Kx, [1,0]).T else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)[:, None] + #import ipdb;ipdb.set_trace() + var = Kxx - (np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T * Kx.T[:,:,None]).sum(1) else: # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" @@ -92,7 +92,7 @@ class SparseGP(GP): Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var[:,None] + return mu, var def _getstate(self): diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index f612ecd7..2924386f 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -1,14 +1,13 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as _np -default_seed = _np.random.seed(123344) +#default_seed = _np.random.seed(123344) -def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=1e4): +def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): """ model for testing purposes. Samples from a GP with rbf kernel and learns the samples with a new kernel. Normally not for optimization, just model cheking """ - from GPy.likelihoods.gaussian import Gaussian import GPy num_inputs = 13 @@ -36,12 +35,17 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + p = .3 + m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) + + if nan: + m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() + m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan + m.parameters_changed() + #=========================================================================== # randomly obstruct data with percentage p - p = .8 - Y_obstruct = Y.copy() - Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan #=========================================================================== #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) m.lengthscales = lengthscales @@ -182,6 +186,8 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, return m def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): + _np.random.seed(1234) + x = _np.linspace(0, 4 * _np.pi, N)[:, None] s1 = _np.vectorize(lambda x: _np.sin(x)) s2 = _np.vectorize(lambda x: _np.cos(x)) @@ -276,6 +282,36 @@ def bgplvm_simulation(optimize=True, verbose=1, m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') return m +def bgplvm_simulation_missing_data(optimize=True, verbose=1, + plot=True, plot_sim=False, + max_iters=2e4, + ): + from GPy import kern + from GPy.models import BayesianGPLVM + from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData + + D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 + _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) + Y = Ylist[0] + k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) + m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) + m.inference_method = VarDTCMissingData() + m.Y[inan] = _np.nan + m.q.variance *= .1 + m.parameters_changed() + + if optimize: + print "Optimizing model:" + m.optimize('bfgs', messages=verbose, max_iters=max_iters, + gtol=.05) + if plot: + m.q.plot("BGPLVM Latent Space 1D") + m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') + return m + + def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy import kern from GPy.models import MRD diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index c073369f..55567051 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -361,7 +361,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) else: kernel = GPy.kern.rbf(X.shape[1], ARD=1) - kernel += GPy.kern.bias(X.shape[1]) + #kernel += GPy.kern.bias(X.shape[1]) X_variance = np.ones(X.shape) * 0.5 m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 @@ -434,10 +434,14 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti return m -def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True): +def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True, nan=False): """Run a 2D example of a sparse GP regression.""" + np.random.seed(1234) X = np.random.uniform(-3., 3., (num_samples, 2)) Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05 + if nan: + inan = np.random.binomial(1,.2,size=Y.shape) + Y[inan] = np.nan # construct kernel rbf = GPy.kern.rbf(2) diff --git a/GPy/inference/__init__.py b/GPy/inference/__init__.py index e69de29b..f1ffd595 100644 --- a/GPy/inference/__init__.py +++ b/GPy/inference/__init__.py @@ -0,0 +1,2 @@ +import latent_function_inference +import optimization \ No newline at end of file diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 4d635c46..a633c381 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference is not tractable. We then resort to a Laplace approximation (laplace.py) or expectation propagation (ep.py). -The inference methods return a "Posterior" instance, which is a simple +The inference methods return a +:class:`~GPy.inference.latent_function_inference.posterior.Posterior` +instance, which is a simple structure which contains a summary of the posterior. The model classes can then use this posterior object for making predictions, optimizing hyper-parameters, etc. @@ -26,6 +28,18 @@ etc. from exact_gaussian_inference import ExactGaussianInference from laplace import Laplace expectation_propagation = 'foo' # TODO -from varDTC import VarDTC +from GPy.inference.latent_function_inference.var_dtc import VarDTC from dtc import DTC from fitc import FITC + +# class FullLatentFunctionData(object): +# +# +# class LatentFunctionInference(object): +# def inference(self, kern, X, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" \ No newline at end of file diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index dbbff6d0..1a811de6 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -32,7 +32,7 @@ class DTC(object): #make sure the noise is not hetero beta = 1./np.squeeze(likelihood.variance) if beta.size <1: - raise NotImplementedError, "no hetero noise with this implementatino of DTC" + raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) Knn = kern.Kdiag(X) @@ -89,4 +89,85 @@ class DTC(object): return post, log_marginal, grad_dict +class vDTC(object): + def __init__(self): + self.const_jitter = 1e-6 + + def inference(self, kern, X, X_variance, Z, likelihood, Y): + assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." + + #TODO: MAX! fix this! + from ...util.misc import param_to_array + Y = param_to_array(Y) + + num_inducing, _ = Z.shape + num_data, output_dim = Y.shape + + #make sure the noise is not hetero + beta = 1./np.squeeze(likelihood.variance) + if beta.size <1: + raise NotImplementedError, "no hetero noise with this implementation of DTC" + + Kmm = kern.K(Z) + Knn = kern.Kdiag(X) + Knm = kern.K(X, Z) + U = Knm + Uy = np.dot(U.T,Y) + + #factor Kmm + Kmmi, L, Li, _ = pdinv(Kmm) + + # Compute A + LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) + A_ = tdot(LiUTbeta) + trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_)) + A = A_ + np.eye(num_inducing) + + # factor A + LA = jitchol(A) + + # back substutue to get b, P, v + tmp, _ = dtrtrs(L, Uy, lower=1) + b, _ = dtrtrs(LA, tmp*beta, lower=1) + tmp, _ = dtrtrs(LA, b, lower=1, trans=1) + v, _ = dtrtrs(L, tmp, lower=1, trans=1) + tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) + P = tdot(tmp.T) + + #compute log marginal + log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ + -np.sum(np.log(np.diag(LA)))*output_dim + \ + 0.5*num_data*output_dim*np.log(beta) + \ + -0.5*beta*np.sum(np.square(Y)) + \ + 0.5*np.sum(np.square(b)) + \ + trace_term + + # Compute dL_dKmm + vvT_P = tdot(v.reshape(-1,1)) + P + LAL = Li.T.dot(A).dot(Li) + dL_dK = Kmmi - 0.5*(vvT_P + LAL) + + # Compute dL_dU + vY = np.dot(v.reshape(-1,1),Y.T) + #dL_dU = vY - np.dot(vvT_P, U.T) + dL_dU = vY - np.dot(vvT_P - Kmmi, U.T) + dL_dU *= beta + + #compute dL_dR + Uv = np.dot(U, v) + dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 + dL_dR -=beta*trace_term/num_data + + grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T} + + #update gradients + kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + likelihood.update_gradients(dL_dR) + + #construct a posterior object + post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) + + + return post, log_marginal, grad_dict + diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 906a7867..50a40449 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -216,9 +216,9 @@ class Laplace(object): """ if not log_concave: #print "Under 1e-10: {}".format(np.sum(W < 1e-6)) - # W[W<1e-6] = 1e-6 + W[W<1e-6] = 1e-6 # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!! - W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur + #W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods diff --git a/GPy/inference/latent_function_inference/varDTC.py b/GPy/inference/latent_function_inference/varDTC.py deleted file mode 100644 index 90ca7a6a..00000000 --- a/GPy/inference/latent_function_inference/varDTC.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from posterior import Posterior -from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify -import numpy as np -from ...util.caching import Cacher -from ...util.misc import param_to_array -log_2_pi = np.log(2*np.pi) - -class VarDTC(object): - """ - An object for inference when the likelihood is Gaussian, but we want to do sparse inference. - - The function self.inference returns a Posterior object, which summarizes - the posterior. - - For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. - - """ - def __init__(self): - #self._YYTfactor_cache = caching.cache() - self.const_jitter = 1e-6 - self.get_trYYT = Cacher(self._get_trYYT, 1) - self.get_YYTfactor = Cacher(self._get_YYTfactor, 1) - - def _get_trYYT(self, Y): - return param_to_array(np.sum(np.square(Y))) - - def _get_YYTfactor(self, Y): - """ - find a matrix L which satisfies LLT = YYT. - - Note that L may have fewer columns than Y. - """ - N, D = Y.shape - if (N>=D): - return param_to_array(Y) - else: - return jitchol(tdot(Y)) - - def get_VVTfactor(self, Y, prec): - return Y * prec # TODO chache this, and make it effective - - def inference(self, kern, X, X_variance, Z, likelihood, Y): - - num_inducing, _ = Z.shape - num_data, output_dim = Y.shape - - #see whether we're using variational uncertain inputs - uncertain_inputs = not (X_variance is None) - - #see whether we've got a different noise variance for each datum - beta = 1./np.squeeze(likelihood.variance) - het_noise = False - if beta.size <1: - het_noise = True - - # kernel computations, using BGPLVM notation - Kmm = kern.K(Z) - if uncertain_inputs: - psi0 = kern.psi0(Z, X, X_variance) - psi1 = kern.psi1(Z, X, X_variance) - psi2 = kern.psi2(Z, X, X_variance) - else: - psi0 = kern.Kdiag(X) - psi1 = kern.K(X, Z) - - #factor Kmm # TODO: cache? - Lm = jitchol(Kmm) - - # The rather complex computations of A - if uncertain_inputs: - if het_noise: - psi2_beta = (psi2 * (beta.flatten().reshape(num_data, 1, 1))).sum(0) - else: - psi2_beta = psi2.sum(0) * beta - if 0: - evals, evecs = linalg.eigh(psi2_beta) - clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable - if not np.array_equal(evals, clipped_evals): - pass # print evals - tmp = evecs * np.sqrt(clipped_evals) - tmp = tmp.T - # no backsubstitution because of bound explosion on tr(A) if not... - LmInv, _ = dtrtri(Lm, lower=1) - A = LmInv.dot(psi2_beta.dot(LmInv.T)) - #print A.sum() - else: - if het_noise: - tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) - else: - tmp = psi1 * (np.sqrt(beta)) - tmp, _ = dtrtrs(Lm, tmp.T, lower=1) - A = tdot(tmp) - - # factor B - B = np.eye(num_inducing) + A - self.A = A - LB = jitchol(B) - - # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! - #self.YYTfactor = self.get_YYTfactor(Y) - #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) - VVT_factor = beta*param_to_array(Y) - trYYT = self.get_trYYT(Y) - - psi1Vf = np.dot(psi1.T, VVT_factor) - - # back substutue C into psi1Vf - tmp, info1 = dtrtrs(Lm, psi1Vf, lower=1, trans=0) - _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) - tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) - Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1) - - - # Compute dL_dKmm - tmp = tdot(_LBi_Lmi_psi1Vf) - data_fit = np.trace(tmp) - DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + tmp) - tmp = -0.5 * DBi_plus_BiPBi - tmp += -0.5 * B * output_dim - tmp += output_dim * np.eye(num_inducing) - dL_dKmm = backsub_both_sides(Lm, tmp) - - # Compute dL_dpsi - dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() - dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) - - if het_noise: - if uncertain_inputs: - dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :] - else: - dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T - dL_dpsi2 = None - else: - dL_dpsi2 = beta * dL_dpsi2_beta - if uncertain_inputs: - # repeat for each of the N psi_2 matrices - dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) - else: - # subsume back into psi1 (==Kmn) - dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) - dL_dpsi2 = None - - - # the partial derivative vector for the likelihood - if likelihood.size == 0: - # save computation here. - partial_for_likelihood = None - elif het_noise: - if uncertain_inputs: - raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" - else: - LBi = chol_inv(LB) - Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) - _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) - - partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2 - partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 - - partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 - - partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2 - partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2 - - else: - # likelihood is not heteroscedatic - partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 - partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) - partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) - - #compute log marginal likelihood - if het_noise: - lik_1 = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y) - lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) - else: - lik_1 = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * trYYT - lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) - lik_3 = -output_dim * (np.sum(np.log(np.diag(LB)))) - lik_4 = 0.5 * data_fit - log_marginal = lik_1 + lik_2 + lik_3 + lik_4 - - #put the gradients in the right places - likelihood.update_gradients(partial_for_likelihood) - - if uncertain_inputs: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} - kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) - else: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} - kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) - - #get sufficient things for posterior prediction - #TODO: do we really want to do this in the loop? - if VVT_factor.shape[1] == Y.shape[1]: - woodbury_vector = Cpsi1Vf # == Cpsi1V - else: - print 'foobar' - psi1V = np.dot(Y.T*beta, psi1).T - tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - tmp, _ = dpotrs(LB, tmp, lower=1) - woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - Bi, _ = dpotri(LB, lower=1) - symmetrify(Bi) - Bi = dpotri(LB, lower=1)[0] - woodbury_inv = backsub_both_sides(Lm, np.eye(num_inducing) - Bi) - - - #construct a posterior object - post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) - - return post, log_marginal, grad_dict - - diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py new file mode 100644 index 00000000..2f11cb08 --- /dev/null +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -0,0 +1,429 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from posterior import Posterior +from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify +import numpy as np +from ...util.misc import param_to_array +log_2_pi = np.log(2*np.pi) + +class VarDTC(object): + """ + An object for inference when the likelihood is Gaussian, but we want to do sparse inference. + + The function self.inference returns a Posterior object, which summarizes + the posterior. + + For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. + + """ + const_jitter = 1e-6 + def __init__(self): + #self._YYTfactor_cache = caching.cache() + from ...util.caching import Cacher + self.get_trYYT = Cacher(self._get_trYYT, 1) + self.get_YYTfactor = Cacher(self._get_YYTfactor, 1) + + def _get_trYYT(self, Y): + return param_to_array(np.sum(np.square(Y))) + + def _get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LLT = YYT. + + Note that L may have fewer columns than Y. + """ + N, D = Y.shape + if (N>=D): + return param_to_array(Y) + else: + return jitchol(tdot(Y)) + + def get_VVTfactor(self, Y, prec): + return Y * prec # TODO chache this, and make it effective + + def inference(self, kern, X, X_variance, Z, likelihood, Y): + + #see whether we're using variational uncertain inputs + uncertain_inputs = not (X_variance is None) + + _, output_dim = Y.shape + + #see whether we've got a different noise variance for each datum + beta = 1./np.squeeze(likelihood.variance) + + # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! + #self.YYTfactor = self.get_YYTfactor(Y) + #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) + VVT_factor = beta*Y + #VVT_factor = beta*Y + trYYT = self.get_trYYT(Y) + + # do the inference: + dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \ + psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on( + kern, X, X_variance, Z, likelihood, + uncertain_inputs, output_dim, + beta, VVT_factor, trYYT) + + likelihood.update_gradients(partial_for_likelihood) + + if uncertain_inputs: + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} + kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) + else: + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} + kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + + #get sufficient things for posterior prediction + #TODO: do we really want to do this in the loop? + if VVT_factor.shape[1] == Y.shape[1]: + woodbury_vector = Cpsi1Vf # == Cpsi1V + else: + print 'foobar' + psi1V = np.dot(Y.T*beta, psi1).T + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + from ...util import diag + diag.add(Bi, 1) + + woodbury_inv = backsub_both_sides(Lm, Bi) + + #construct a posterior object + post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + + return post, log_marginal, grad_dict + +class VarDTCMissingData(object): + def __init__(self): + from ...util.caching import Cacher + self._Y = Cacher(self._subarray_computations, 1) + pass + + def _subarray_computations(self, Y): + inan = np.isnan(Y) + has_none = inan.any() + if has_none: + from ...util.subarray_and_sorting import common_subarrays + self._subarray_indices = [] + for v,ind in common_subarrays(inan, 1).iteritems(): + if not np.all(v): + v = ~np.array(v, dtype=bool) + ind = np.array(ind, dtype=int) + if ind.size == Y.shape[1]: + ind = slice(None) + self._subarray_indices.append([v,ind]) + Ys = [Y[v, :][:, ind] for v, ind in self._subarray_indices] + traces = [(y**2).sum() for y in Ys] + return Ys, traces + else: + self._subarray_indices = [[slice(None),slice(None)]] + return [Y], [(Y**2).sum()] + + def inference(self, kern, X, X_variance, Z, likelihood, Y): + Ys, traces = self._Y(Y) + beta_all = 1./likelihood.variance + uncertain_inputs = not (X_variance is None) + het_noise = beta_all.size != 1 + + import itertools + num_inducing = Z.shape[0] + + dL_dpsi0_all = np.zeros(X.shape[0]) + dL_dpsi1_all = np.zeros((X.shape[0], num_inducing)) + if uncertain_inputs: + dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) + + partial_for_likelihood = 0 + woodbury_vector = np.zeros((num_inducing, Y.shape[1])) + woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1])) + dL_dKmm = 0 + log_marginal = 0 + + Kmm = kern.K(Z) + #factor Kmm + Lm = jitchol(Kmm) + if uncertain_inputs: LmInv = dtrtri(Lm) + + # kernel computations, using BGPLVM notation + psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + + VVT_factor_all = np.empty(Y.shape) + full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] + if not full_VVT_factor: + psi1V = np.dot(Y.T*beta_all, psi1_all).T + + for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): + if het_noise: beta = beta_all[ind] + else: beta = beta_all[0] + + VVT_factor = (beta*y) + VVT_factor_all[v, ind].flat = VVT_factor.flat + output_dim = y.shape[1] + + psi0 = psi0_all[v] + psi1 = psi1_all[v, :] + if uncertain_inputs: psi2 = psi2_all[v, :] + else: psi2 = None + num_data = psi1.shape[0] + + if uncertain_inputs: + if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: psi2_beta = psi2.sum(0) * beta + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() + + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + + psi1Vf = psi1.T.dot(VVT_factor) + _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) + + #LB_all[ind, :,:] = LB + + # data fit and derivative of L w.r.t. Kmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + dL_dKmm += backsub_both_sides(Lm, delit) + + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + + #import ipdb;ipdb.set_trace() + dL_dpsi0_all[v] += dL_dpsi0 + dL_dpsi1_all[v, :] += dL_dpsi1 + if uncertain_inputs: + dL_dpsi2_all[v, :] += dL_dpsi2 + + # log marginal likelihood + log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit) + + #put the gradients in the right places + partial_for_likelihood += _compute_partial_for_likelihood(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT) + + if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf + else: + print 'foobar' + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] + + #import ipdb;ipdb.set_trace() + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + from ...util import diag + diag.add(Bi, 1) + woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] + + # gradients: + likelihood.update_gradients(partial_for_likelihood) + + if uncertain_inputs: + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all} + kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) + else: + grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all} + kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + + #get sufficient things for posterior prediction + #TODO: do we really want to do this in the loop? + #if not full_VVT_factor: + # print 'foobar' + # psi1V = np.dot(Y.T*beta_all, psi1_all).T + # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + # tmp, _ = dpotrs(LB_all, tmp, lower=1) + # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + #import ipdb;ipdb.set_trace() + #Bi, _ = dpotri(LB_all, lower=1) + #symmetrify(Bi) + #Bi = -dpotri(LB_all, lower=1)[0] + #from ...util import diag + #diag.add(Bi, 1) + + #woodbury_inv = backsub_both_sides(Lm, Bi) + + post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + + return post, log_marginal, grad_dict + + +def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm): +# The rather complex computations of A + if uncertain_inputs: + if het_noise: + psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: + psi2_beta = psi2.sum(0) * beta + #if 0: + # evals, evecs = linalg.eigh(psi2_beta) + # clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable + # if not np.array_equal(evals, clipped_evals): + # pass # print evals + # tmp = evecs * np.sqrt(clipped_evals) + # tmp = tmp.T + # no backsubstitution because of bound explosion on tr(A) if not... + LmInv = dtrtri(Lm) + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() + return A + + +def _compute_psi(kern, X, X_variance, Z, uncertain_inputs): + if uncertain_inputs: + psi0 = kern.psi0(Z, X, X_variance) + psi1 = kern.psi1(Z, X, X_variance) + psi2 = kern.psi2(Z, X, X_variance) + else: + psi0 = kern.Kdiag(X) + psi1 = kern.K(X, Z) + psi2 = None + return psi0, psi1, psi2 + +def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs): + Kmm = kern.K(Z) + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + return Kmm, psi0, psi1, psi2 + +def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf): + # Compute dL_dKmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + dL_dKmm = backsub_both_sides(Lm, delit) + return DBi_plus_BiPBi, data_fit, dL_dKmm + +def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): + dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() + dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T) + dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi) + if het_noise: + if uncertain_inputs: + dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :] + else: + dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T + dL_dpsi2 = None + else: + dL_dpsi2 = beta * dL_dpsi2_beta + if uncertain_inputs: + # repeat for each of the N psi_2 matrices + dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0) + else: + # subsume back into psi1 (==Kmn) + dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) + dL_dpsi2 = None + + return dL_dpsi0, dL_dpsi1, dL_dpsi2 + + +def _compute_psi1Vf(Lm, LB, psi1Vf): + # back substutue C into psi1Vf + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + return _LBi_Lmi_psi1Vf, Cpsi1Vf + + +def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT): + # the partial derivative vector for the likelihood + if likelihood.size == 0: + # save computation here. + partial_for_likelihood = None + elif het_noise: + if uncertain_inputs: + raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" + else: + from ...util.linalg import chol_inv + LBi = chol_inv(LB) + Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) + _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) + + partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2 + partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 + + partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 + + partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2 + partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2 + + else: + # likelihood is not heteroscedatic + partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 + partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) + partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) + return partial_for_likelihood + +def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit): +#compute log marginal likelihood + if het_noise: + lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y) + lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) + else: + lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT + lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) + lik_3 = -output_dim * (np.sum(np.log(np.diag(LB)))) + lik_4 = 0.5 * data_fit + log_marginal = lik_1 + lik_2 + lik_3 + lik_4 + return log_marginal + +def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT): + het_noise = beta.size < 1 + num_inducing = Z.shape[0] + num_data = X.shape[0] + # kernel computations, using BGPLVM notation + Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs) + #factor Kmm # TODO: cache? + Lm = jitchol(Kmm) + A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm) + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + psi1Vf = np.dot(psi1.T, VVT_factor) + _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) + # data fit and derivative of L w.r.t. Kmm + DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim, + Lm, B, LB, _LBi_Lmi_psi1Vf) + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + # log marginal likelihood + log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit) + #put the gradients in the right places + partial_for_likelihood = _compute_partial_for_likelihood(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT) + return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index a362aff8..96afc78a 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -136,7 +136,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD) return kern(input_dim, [part]) -def white(input_dim,variance=1.): +def white(input_dim,variance=1.,name='white'): """ Construct a white kernel. @@ -146,7 +146,7 @@ def white(input_dim,variance=1.): :type variance: float """ - part = parts.white.White(input_dim,variance) + part = parts.white.White(input_dim,variance,name=name) return kern(input_dim, [part]) def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None): diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py index db7c9834..828ece11 100644 --- a/GPy/kern/parts/linear.py +++ b/GPy/kern/parts/linear.py @@ -60,7 +60,7 @@ class Linear(Kernpart): self._K_computations(X, None) def update_gradients_full(self, dL_dK, X): - #self.variances.gradient[:] = 0 + self.variances.gradient[:] = 0 self._param_grad_helper(dL_dK, X, None, self.variances.gradient) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): diff --git a/GPy/kern/parts/white.py b/GPy/kern/parts/white.py index b0e961c7..c7e4c6dd 100644 --- a/GPy/kern/parts/white.py +++ b/GPy/kern/parts/white.py @@ -15,8 +15,8 @@ class White(Kernpart): :param variance: :type variance: float """ - def __init__(self,input_dim,variance=1.): - super(White, self).__init__(input_dim, 'white') + def __init__(self,input_dim,variance=1., name='white'): + super(White, self).__init__(input_dim, name) self.input_dim = input_dim self.variance = Param('variance', variance, Logexp()) self.add_parameters(self.variance) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index cd24ac18..a72acc1a 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -28,6 +28,7 @@ class GPRegression(GP): likelihood = likelihoods.Gaussian() super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') + self.parameters_changed() def _getstate(self): return GP._getstate(self) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 06481b81..630b1e8c 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -44,7 +44,7 @@ class GPLVM(GP): PC = PCA(Y, input_dim)[0] Xr[:PC.shape[0], :PC.shape[1]] = PC else: - raise NotImplementedError + pass return Xr def parameters_changed(self): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..511ce5aa 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -10,6 +10,22 @@ import pylab from GPy.kern.kern import kern from GPy.models.bayesian_gplvm import BayesianGPLVM +class MRD2(Model): + """ + Apply MRD to all given datasets Y in Ylist. + + Y_i in [n x p_i] + + The samples n in the datasets need + to match up, whereas the dimensionality p_d can differ. + + :param [array-like] Ylist: List of datasets to apply MRD on + :param array-like q_mean: mean of starting latent space q in [n x q] + :param array-like q_variance: variance of starting latent space q in [n x q] + :param :class:`~GPy.inference.latent_function_inference + """ + + class MRD(Model): """ Do MRD on given Datasets in Ylist. diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 4e6589b4..c9896116 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -132,10 +132,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: m, _ = model._raw_predict(Xgrid, which_parts=which_parts) - Y = model.likelihood.Y + Y = model.Y else: m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) - Y = model.likelihood.data + Y = model.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 8806e6d3..51ba56f3 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -30,7 +30,14 @@ class Cacher(object): def on_cache_changed(self, X): #print id(X) - i = self.cached_inputs.index(X) + Xbase = X + while Xbase is not None: + try: + i = self.cached_inputs.index(X) + break + except ValueError: + Xbase = X.base + continue self.inputs_changed[i] = True diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 6bcddc3e..22b4f86c 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -101,17 +101,17 @@ def jitchol(A, maxtries=5): -def dtrtri(L, lower=1): - """ - Wrapper for lapack dtrtri function - Inverse of L - - :param L: Triangular Matrix L - :param lower: is matrix lower (true) or upper (false) - :returns: Li, info - """ - L = force_F_ordered(L) - return lapack.dtrtri(L, lower=lower) +# def dtrtri(L, lower=1): +# """ +# Wrapper for lapack dtrtri function +# Inverse of L +# +# :param L: Triangular Matrix L +# :param lower: is matrix lower (true) or upper (false) +# :returns: Li, info +# """ +# L = force_F_ordered(L) +# return lapack.dtrtri(L, lower=lower) def dtrtrs(A, B, lower=1, trans=0, unitdiag=0): """ diff --git a/GPy/util/subarray_and_sorting.py b/GPy/util/subarray_and_sorting.py index 49385771..ab7b7672 100644 --- a/GPy/util/subarray_and_sorting.py +++ b/GPy/util/subarray_and_sorting.py @@ -17,7 +17,7 @@ def common_subarrays(X, axis=0): :param :class:`np.ndarray` X: 2d array to check for common subarrays in :param int axis: axis to apply subarray detection over. - When the index is 0, compare rows, columns, otherwise. + When the index is 0, compare rows -- columns, otherwise. Examples: =========