diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index a42d76ed..25651827 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -4,6 +4,7 @@ from model import * from parameterization.parameterized import adjust_name_for_printing, Parameterizable from parameterization.param import Param, ParamConcatenation +from parameterization.observable_array import ObsAr from gp import GP from sparse_gp import SparseGP diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 5be3e944..490bcc72 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -208,27 +208,9 @@ class GP(Model): from ..plotting.matplot_dep import models_plots return models_plots.plot_fit(self,*args,**kwargs) - def _getstate(self): + def input_sensitivity(self): """ - - Get the current state of the class, here we return everything that is - needed to recompute the model. - + Returns the sensitivity for each dimension of this model """ + return self.kern.input_sensitivity() - return Model._getstate(self) + [self.X, - self.num_data, - self.input_dim, - self.kern, - self.likelihood, - self.output_dim, - ] - - def _setstate(self, state): - self.output_dim = state.pop() - self.likelihood = state.pop() - self.kern = state.pop() - self.input_dim = state.pop() - self.num_data = state.pop() - self.X = state.pop() - Model._setstate(self, state) diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index efd9476f..6eaaf96c 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -34,7 +34,7 @@ class Mapping(Parameterized): raise NotImplementedError def df_dtheta(self, dL_df, X): - """The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. + """The gradient of the outputs of the mapping with respect to each of the parameters. :param dL_df: gradient of the objective with respect to the function. :type dL_df: ndarray (num_data x output_dim) @@ -50,7 +50,7 @@ class Mapping(Parameterized): """ Plots the mapping associated with the model. - In one dimension, the function is plotted. - - In two dimsensions, a contour-plot shows the function + - In two dimensions, a contour-plot shows the function - In higher dimensions, we've not implemented this yet !TODO! Can plot only part of the data and part of the posterior functions diff --git a/GPy/core/model.py b/GPy/core/model.py index 1f53885c..38e8d4cf 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -24,37 +24,9 @@ class Model(Parameterized): def log_likelihood(self): raise NotImplementedError, "this needs to be implemented to use the model class" - def _log_likelihood_gradients(self): return self.gradient - def _getstate(self): - """ - Get the current state of the class. - Inherited from Parameterized, so add those parameters to the state - - :return: list of states from the model. - - """ - return Parameterized._getstate(self) + \ - [self.priors, self.optimization_runs, - self.sampling_runs, self.preferred_optimizer] - - def _setstate(self, state): - """ - set state from previous call to _getstate - call Parameterized with the rest of the state - - :param state: the state of the model. - :type state: list as returned from _getstate. - - """ - self.preferred_optimizer = state.pop() - self.sampling_runs = state.pop() - self.optimization_runs = state.pop() - self.priors = state.pop() - Parameterized._setstate(self, state) - def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): """ Perform random restarts of the model, and set the model to the best @@ -142,13 +114,60 @@ class Model(Parameterized): """ raise DeprecationWarning, 'parameters now have default constraints' - def input_sensitivity(self): + def objective_function(self): """ - Returns the sensitivity for each dimension of this kernel. - """ - return self.kern.input_sensitivity() + The objective function for the given algorithm. - def objective_function(self, x): + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the objective function here. + + For probabilistic models this is the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your objective here! + """ + return -float(self.log_likelihood()) - self.log_prior() + + def objective_function_gradients(self): + """ + The gradients for the objective function for the given algorithm. + + You can find the gradient for the parameters in self.gradient at all times. + This is the place, where gradients get stored for parameters. + + This function is the true objective, which wants to be minimized. + Note that all parameters are already set and in place, so you just need + to return the gradient here. + + For probabilistic models this is the gradient of the negative log_likelihood + (including the MAP prior), so we return it here. If your model is not + probabilistic, just return your gradient here! + """ + return -(self._log_likelihood_gradients() + self._log_prior_gradients()) + + def _grads(self, x): + """ + Gets the gradients from the likelihood and the priors. + + Failures are handled robustly. The algorithm will try several times to + return the gradients, and will raise the original exception if + the objective cannot be computed. + + :param x: the parameters of the model. + :type x: np.array + """ + try: + self._set_params_transformed(x) + obj_grads = self._transform_gradients(self.objective_function_gradients()) + self._fail_count = 0 + except (LinAlgError, ZeroDivisionError, ValueError): + if self._fail_count >= self._allowed_failures: + raise + self._fail_count += 1 + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) + return obj_grads + + def _objective(self, x): """ The objective function passed to the optimizer. It combines the likelihood and the priors. @@ -162,55 +181,26 @@ class Model(Parameterized): """ try: self._set_params_transformed(x) + obj = self.objective_function() self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 return np.inf - return -float(self.log_likelihood()) - self.log_prior() + return obj - def objective_function_gradients(self, x): - """ - Gets the gradients from the likelihood and the priors. - - Failures are handled robustly. The algorithm will try several times to - return the gradients, and will raise the original exception if - the objective cannot be computed. - - :param x: the parameters of the model. - :type x: np.array - """ + def _objective_grads(self, x): try: self._set_params_transformed(x) - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients()) self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: + except (LinAlgError, ZeroDivisionError, ValueError): if self._fail_count >= self._allowed_failures: - raise e - self._fail_count += 1 - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) - return obj_grads - - def objective_and_gradients(self, x): - """ - Compute the objective function of the model and the gradient of the model at the point given by x. - - :param x: the point at which gradients are to be computed. - :type x: np.array - """ - - try: - self._set_params_transformed(x) - obj_f = -float(self.log_likelihood()) - self.log_prior() - obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) - self._fail_count = 0 - except (LinAlgError, ZeroDivisionError, ValueError) as e: - if self._fail_count >= self._allowed_failures: - raise e + raise self._fail_count += 1 obj_f = np.inf - obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) + obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): @@ -241,7 +231,7 @@ class Model(Parameterized): optimizer = optimization.get_optimizer(optimizer) opt = optimizer(start, model=self, **kwargs) - opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) + opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) self.optimization_runs.append(opt) @@ -289,19 +279,22 @@ class Model(Parameterized): # just check the global ratio dx = np.zeros(x.shape) - dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size)) + dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.) # evaulate around the point x - f1 = self.objective_function(x + dx) - f2 = self.objective_function(x - dx) - gradient = self.objective_function_gradients(x) + f1 = self._objective(x + dx) + f2 = self._objective(x - dx) + gradient = self._grads(x) dx = dx[transformed_index] gradient = gradient[transformed_index] denominator = (2 * np.dot(dx, gradient)) global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator) - return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance + global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance) + if global_ratio is np.nan: + global_ratio = 0 + return np.abs(1. - global_ratio) < tolerance or global_diff else: # check the gradient of each parameter individually, and do some pretty printing try: @@ -337,15 +330,15 @@ class Model(Parameterized): print "No free parameters to check" return - gradient = self.objective_function_gradients(x).copy() + gradient = self._grads(x).copy() np.where(gradient == 0, 1e-312, gradient) ret = True for nind, xind in itertools.izip(param_index, transformed_index): xx = x.copy() xx[xind] += step - f1 = self.objective_function(xx) + f1 = self._objective(xx) xx[xind] -= 2.*step - f2 = self.objective_function(xx) + f2 = self._objective(xx) numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind] else: ratio = (f1 - f2) / (2 * step * gradient[xind]) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index c22d8b6b..12b3a298 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -24,12 +24,6 @@ class ParameterIndexOperations(object): for t, i in constraints.iteritems(): self.add(t, i) - def __getstate__(self): - return self._properties - - def __setstate__(self, state): - self._properties = state - def iteritems(self): return self._properties.iteritems() @@ -92,13 +86,18 @@ class ParameterIndexOperations(object): for i, v in parameter_index_view.iteritems(): self.add(i, v+offset) - def copy(self): + return self.__deepcopy__(None) + + def __deepcopy__(self, memo): return ParameterIndexOperations(dict(self.iteritems())) def __getitem__(self, prop): return self._properties[prop] + def __delitem__(self, prop): + del self._properties[prop] + def __str__(self, *args, **kwargs): import pprint return pprint.pformat(dict(self._properties)) @@ -183,7 +182,7 @@ class ParameterIndexOperationsView(object): def remove(self, prop, indices): - removed = self._param_index_ops.remove(prop, indices+self._offset) + removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset) if removed.size > 0: return removed - self._size + 1 return removed @@ -193,6 +192,9 @@ class ParameterIndexOperationsView(object): ind = self._filter_index(self._param_index_ops[prop]) return ind + def __delitem__(self, prop): + self.remove(prop, self[prop]) + def __str__(self, *args, **kwargs): import pprint return pprint.pformat(dict(self.iteritems())) @@ -203,6 +205,9 @@ class ParameterIndexOperationsView(object): def copy(self): + return self.__deepcopy__(None) + + def __deepcopy__(self, memo): return ParameterIndexOperations(dict(self.iteritems())) pass diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index ca0589c9..604d0a01 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -5,6 +5,7 @@ Created on 27 Feb 2014 ''' from collections import defaultdict +import weakref def intarray_default_factory(): import numpy as np @@ -28,4 +29,90 @@ class ArrayList(list): return True return False + def index(self, item): + index = 0 + for el in self: + if el is item: + return index + index += 1 + raise ValueError, "{} is not in list".format(item) + pass + +class ObservablesList(object): + def __init__(self): + self._poc = [] + + def __getitem__(self, ind): + p,o,c = self._poc[ind] + return p, o(), c + + def remove(self, priority, observable, callble): + """ + """ + self.flush() + for i in range(len(self) - 1, -1, -1): + p,o,c = self[i] + if priority==p and observable==o and callble==c: + del self._poc[i] + + def __repr__(self): + return self._poc.__repr__() + + def add(self, priority, observable, callble): + ins = 0 + for pr, _, _ in self: + if priority > pr: + break + ins += 1 + self._poc.insert(ins, (priority, weakref.ref(observable), callble)) + + def __str__(self): + ret = [] + curr_p = None + for p, o, c in self: + curr = '' + if curr_p != p: + pre = "{!s}: ".format(p) + curr_pre = pre + else: curr_pre = " "*len(pre) + curr_p = p + curr += curr_pre + ret.append(curr + ", ".join(map(repr, [o,c]))) + return '\n'.join(ret) + + def flush(self): + self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] + + def __iter__(self): + self.flush() + for p, o, c in self._poc: + if o() is not None: + yield p, o(), c + + def __len__(self): + self.flush() + return self._poc.__len__() + + def __deepcopy__(self, memo): + self.flush() + s = ObservablesList() + import copy + s._poc = copy.deepcopy(self._poc, memo) + return s + + def __getstate__(self): + self.flush() + from ...util.caching import Cacher + obs = [] + for p, o, c in self: + if (getattr(o, c.__name__, None) is not None + and not isinstance(o, Cacher)): + obs.append((p,o,c.__name__)) + return obs + + def __setstate__(self, state): + self._poc = [] + for p, o, c in state: + self.add(p,o,getattr(o, c)) + pass diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/observable_array.py similarity index 52% rename from GPy/core/parameterization/array_core.py rename to GPy/core/parameterization/observable_array.py index 780367c8..56d33bfc 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/observable_array.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -__updated__ = '2014-03-21' +__updated__ = '2014-03-31' import numpy as np -from parameter_core import Observable +from parameter_core import Observable, Pickleable -class ObsAr(np.ndarray, Observable): +class ObsAr(np.ndarray, Pickleable, Observable): """ An ndarray which reports changes to its observers. The observers can add themselves with a callable, which @@ -25,37 +25,34 @@ class ObsAr(np.ndarray, Observable): def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return - self._observer_callables_ = getattr(obj, '_observer_callables_', None) + self.observers = getattr(obj, 'observers', None) def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) + def copy(self): + memo = {} + memo[id(self)] = self + return self.__deepcopy__(memo) + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + def __reduce__(self): - func, args, state = np.ndarray.__reduce__(self) - return func, args, (state, Observable._getstate(self)) + func, args, state = super(ObsAr, self).__reduce__() + return func, args, (state, Pickleable.__getstate__(self)) def __setstate__(self, state): np.ndarray.__setstate__(self, state[0]) - Observable._setstate(self, state[1]) - - def _s_not_empty(self, s): - # this checks whether there is something picked by this slice. - return True - # TODO: disarmed, for performance increase, - if not isinstance(s, (list,tuple,np.ndarray)): - return True - if isinstance(s, (list,tuple)): - return len(s)!=0 - if isinstance(s, np.ndarray): - if s.dtype is bool: - return np.all(s) - else: - return s.size != 0 + Pickleable.__setstate__(self, state[1]) def __setitem__(self, s, val): - if self._s_not_empty(s): - super(ObsAr, self).__setitem__(s, val) - self.notify_observers() + super(ObsAr, self).__setitem__(s, val) + self.notify_observers() def __getslice__(self, start, stop): return self.__getitem__(slice(start, stop)) @@ -63,12 +60,6 @@ class ObsAr(np.ndarray, Observable): def __setslice__(self, start, stop, val): return self.__setitem__(slice(start, stop), val) - def __copy__(self, *args): - return ObsAr(self.view(np.ndarray).copy()) - - def copy(self, *args): - return self.__copy__(*args) - def __ilshift__(self, *args, **kwargs): r = np.ndarray.__ilshift__(self, *args, **kwargs) self.notify_observers() @@ -143,77 +134,4 @@ class ObsAr(np.ndarray, Observable): def __imul__(self, *args, **kwargs): r = np.ndarray.__imul__(self, *args, **kwargs) self.notify_observers() - return r - - -# def __rrshift__(self, *args, **kwargs): -# r = np.ndarray.__rrshift__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __ror__(self, *args, **kwargs): -# r = np.ndarray.__ror__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rxor__(self, *args, **kwargs): -# r = np.ndarray.__rxor__(self, *args, **kwargs) -# self.notify_observers() -# return r - - - -# def __rdivmod__(self, *args, **kwargs): -# r = np.ndarray.__rdivmod__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __radd__(self, *args, **kwargs): -# r = np.ndarray.__radd__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rdiv__(self, *args, **kwargs): -# r = np.ndarray.__rdiv__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rtruediv__(self, *args, **kwargs): -# r = np.ndarray.__rtruediv__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rshift__(self, *args, **kwargs): -# r = np.ndarray.__rshift__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rmul__(self, *args, **kwargs): -# r = np.ndarray.__rmul__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rpow__(self, *args, **kwargs): -# r = np.ndarray.__rpow__(self, *args, **kwargs) -# self.notify_observers() -# return r - - -# def __rsub__(self, *args, **kwargs): -# r = np.ndarray.__rsub__(self, *args, **kwargs) -# self.notify_observers() -# return r - -# def __rfloordiv__(self, *args, **kwargs): -# r = np.ndarray.__rfloordiv__(self, *args, **kwargs) -# self.notify_observers() -# return r - + return r \ No newline at end of file diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 984fc950..9c3d7bd3 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -4,7 +4,7 @@ import itertools import numpy from parameter_core import OptimizationHandlable, adjust_name_for_printing -from array_core import ObsAr +from observable_array import ObsAr ###### printing __constraints_name__ = "Constraint" @@ -43,14 +43,13 @@ class Param(OptimizationHandlable, ObsAr): _fixes_ = None _parameters_ = [] def __new__(cls, name, input_array, default_constraint=None): - obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint)) + obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) cls.__name__ = "Param" obj._current_slice_ = (slice(obj.shape[0]),) obj._realshape_ = obj.shape obj._realsize_ = obj.size obj._realndim_ = obj.ndim obj._original_ = True - obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64) return obj def __init__(self, name, input_array, default_constraint=None, *a, **kw): @@ -60,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr): import pydot node = pydot.Node(id(self), shape='record', label=self.name) G.add_node(node) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) @@ -87,74 +86,24 @@ class Param(OptimizationHandlable, ObsAr): self.priors = getattr(obj, 'priors', None) @property - def _param_array_(self): + def param_array(self): return self @property def gradient(self): + """ + Return a view on the gradient, which is in the same shape as this parameter is. + Note: this is not the real gradient array, it is just a view on it. + + To work on the real gradient array use: self.full_gradient + """ + if getattr(self, '_gradient_array_', None) is None: + self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64) return self._gradient_array_[self._current_slice_] @gradient.setter def gradient(self, val): - self.gradient[:] = val - - #=========================================================================== - # Pickling operations - #=========================================================================== - def __reduce__(self): - func, args, state = super(Param, self).__reduce__() - return func, args, (state, - (self._name, - self._parent_, - self._parent_index_, - self._default_constraint_, - self._current_slice_, - self._realshape_, - self._realsize_, - self._realndim_, - self.constraints, - self.priors - ) - ) - - def __setstate__(self, state): - super(Param, self).__setstate__(state[0]) - state = list(state[1]) - self.priors = state.pop() - self.constraints = state.pop() - self._realndim_ = state.pop() - self._realsize_ = state.pop() - self._realshape_ = state.pop() - self._current_slice_ = state.pop() - self._default_constraint_ = state.pop() - self._parent_index_ = state.pop() - self._parent_ = state.pop() - self._name = state.pop() - - def copy(self, *args): - constr = self.constraints.copy() - priors = self.priors.copy() - p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) - p.constraints = constr - p.priors = priors - return p - #=========================================================================== - # get/set parameters - #=========================================================================== -# def _set_params(self, param, trigger_parent=True): -# self.flat = param -# if trigger_parent: min_priority = None -# else: min_priority = -numpy.inf -# self.notify_observers(None, min_priority) -# -# def _get_params(self): -# return self.flat -# -# def _collect_gradient(self, target): -# target += self.gradient.flat -# -# def _set_gradient(self, g): -# self.gradient = g.reshape(self._realshape_) + self._gradient_array_[self._current_slice_] = val #=========================================================================== # Array operations -> done @@ -172,24 +121,6 @@ class Param(OptimizationHandlable, ObsAr): def __setitem__(self, s, val): super(Param, self).__setitem__(s, val) - #=========================================================================== - # Index Operations: - #=========================================================================== - #def _internal_offset(self): - # internal_offset = 0 - # extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] - # for i, si in enumerate(self._current_slice_[:self._realndim_]): - # if numpy.all(si == Ellipsis): - # continue - # if isinstance(si, slice): - # a = si.indices(self._realshape_[i])[0] - # elif isinstance(si, (list,numpy.ndarray,tuple)): - # a = si[0] - # else: a = si - # if a < 0: - # a = self._realshape_[i] + a - # internal_offset += a * extended_realshape[i] - # return internal_offset def _raveled_index(self, slice_index=None): # return an index array on the raveled array, which is formed by the current_slice @@ -235,13 +166,21 @@ class Param(OptimizationHandlable, ObsAr): def is_fixed(self): from transformations import __fixed__ return self.constraints[__fixed__].size == self.size - #def round(self, decimals=0, out=None): - # view = super(Param, self).round(decimals, out).view(Param) - # view.__array_finalize__(self) - # return view - #round.__doc__ = numpy.round.__doc__ + def _get_original(self, param): return self + + #=========================================================================== + # Pickling and copying + #=========================================================================== + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + #=========================================================================== # Printing -> done #=========================================================================== @@ -250,7 +189,8 @@ class Param(OptimizationHandlable, ObsAr): if self.size <= 1: return [str(self.view(numpy.ndarray)[0])] else: return [str(self.shape)] - def parameter_names(self, add_self=False, adjust_for_printing=False): + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + # this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP if adjust_for_printing: return [adjust_name_for_printing(self.name)] return [self.name] @@ -261,6 +201,9 @@ class Param(OptimizationHandlable, ObsAr): def parameter_shapes(self): return [self.shape] @property + def num_params(self): + return 0 + @property def _constraints_str(self): return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))] @property @@ -282,8 +225,8 @@ class Param(OptimizationHandlable, ObsAr): if isinstance(slice_index, (tuple, list)): clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): - i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + i+=1 + clean_curr_slice += [range(self._realshape_[i])] if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), @@ -368,7 +311,7 @@ class ParamConcatenation(object): #=========================================================================== def __getitem__(self, s): ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True; - params = [p._param_array_[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._param_array_[ind[ps]])] + params = [p.param_array[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array[ind[ps]])] if len(params)==1: return params[0] return ParamConcatenation(params) def __setitem__(self, s, val, update=True): @@ -381,7 +324,7 @@ class ParamConcatenation(object): if update: self.update_all_params() def values(self): - return numpy.hstack([p._param_array_ for p in self.params]) + return numpy.hstack([p.param_array.flat for p in self.params]) #=========================================================================== # parameter operations: #=========================================================================== diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 1cdeee0b..43bc7177 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -13,10 +13,10 @@ Observable Pattern for patameterization """ -from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED +from transformations import Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED import numpy as np -__updated__ = '2014-03-24' +__updated__ = '2014-03-31' class HierarchyError(Exception): """ @@ -31,71 +31,8 @@ def adjust_name_for_printing(name): return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@", '_at_') return '' -class InterfacePickleFunctions(object): - def __init__(self, *a, **kw): - super(InterfacePickleFunctions, self).__init__() - def _getstate(self): - """ - Returns the state of this class in a memento pattern. - The state must be a list-like structure of all the fields - this class needs to run. - - See python doc "pickling" (`__getstate__` and `__setstate__`) for details. - """ - raise NotImplementedError, "To be able to use pickling you need to implement this method" - def _setstate(self, state): - """ - Set the state (memento pattern) of this class to the given state. - Usually this is just the counterpart to _getstate, such that - an object is a copy of another when calling - - copy = .__new__(*args,**kw)._setstate(._getstate()) - - See python doc "pickling" (`__getstate__` and `__setstate__`) for details. - """ - raise NotImplementedError, "To be able to use pickling you need to implement this method" - -class Pickleable(InterfacePickleFunctions): - """ - Make an object pickleable (See python doc 'pickling'). - - This class allows for pickling support by Memento pattern. - _getstate returns a memento of the class, which gets pickled. - _setstate() (re-)sets the state of the class to the memento - """ - def __init__(self, *a, **kw): - super(Pickleable, self).__init__() - #=========================================================================== - # Pickling operations - #=========================================================================== - def pickle(self, f, protocol=-1): - """ - :param f: either filename or open file object to write to. - if it is an open buffer, you have to make sure to close - it properly. - :param protocol: pickling protocol to use, python-pickle for details. - """ - import cPickle - if isinstance(f, str): - with open(f, 'w') as f: - cPickle.dump(self, f, protocol) - else: - cPickle.dump(self, f, protocol) - def __getstate__(self): - if self._has_get_set_state(): - return self._getstate() - return self.__dict__ - def __setstate__(self, state): - if self._has_get_set_state(): - self._setstate(state) - # TODO: maybe parameters_changed() here? - return - self.__dict__ = state - def _has_get_set_state(self): - return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__) - -class Observable(Pickleable): +class Observable(object): """ Observable pattern for parameterization. @@ -105,23 +42,25 @@ class Observable(Pickleable): """ _updated = True def __init__(self, *args, **kwargs): - super(Observable, self).__init__(*args, **kwargs) - self._observer_callables_ = [] + super(Observable, self).__init__() + from lists_and_dicts import ObservablesList + self.observers = ObservablesList() def add_observer(self, observer, callble, priority=0): - self._insert_sorted(priority, observer, callble) + self.observers.add(priority, observer, callble) def remove_observer(self, observer, callble=None): to_remove = [] - for p, obs, clble in self._observer_callables_: + for poc in self.observers: + _, obs, clble = poc if callble is not None: if (obs == observer) and (callble == clble): - to_remove.append((p, obs, clble)) + to_remove.append(poc) else: if obs is observer: - to_remove.append((p, obs, clble)) + to_remove.append(poc) for r in to_remove: - self._observer_callables_.remove(r) + self.observers.remove(*r) def notify_observers(self, which=None, min_priority=None): """ @@ -136,32 +75,18 @@ class Observable(Pickleable): if which is None: which = self if min_priority is None: - [callble(self, which=which) for _, _, callble in self._observer_callables_] + [callble(self, which=which) for _, _, callble in self.observers] else: - for p, _, callble in self._observer_callables_: + for p, _, callble in self.observers: if p <= min_priority: break callble(self, which=which) - def _insert_sorted(self, p, o, c): - ins = 0 - for pr, _, _ in self._observer_callables_: - if p > pr: - break - ins += 1 - self._observer_callables_.insert(ins, (p, o, c)) - - def _getstate(self): - return [self._observer_callables_] - - def _setstate(self, state): - self._observer_callables_ = state.pop() - #=============================================================================== # Foundation framework for parameterized and param objects: #=============================================================================== -class Parentable(Observable): +class Parentable(object): """ Enable an Object to have a parent. @@ -171,7 +96,7 @@ class Parentable(Observable): _parent_ = None _parent_index_ = None def __init__(self, *args, **kwargs): - super(Parentable, self).__init__(*args, **kwargs) + super(Parentable, self).__init__() def has_parent(self): """ @@ -207,7 +132,84 @@ class Parentable(Observable): """ pass -class Gradcheckable(Parentable): +class Pickleable(object): + """ + Make an object pickleable (See python doc 'pickling'). + + This class allows for pickling support by Memento pattern. + _getstate returns a memento of the class, which gets pickled. + _setstate() (re-)sets the state of the class to the memento + """ + def __init__(self, *a, **kw): + super(Pickleable, self).__init__() + #=========================================================================== + # Pickling operations + #=========================================================================== + def pickle(self, f, protocol=-1): + """ + :param f: either filename or open file object to write to. + if it is an open buffer, you have to make sure to close + it properly. + :param protocol: pickling protocol to use, python-pickle for details. + """ + import cPickle as pickle + import pickle #TODO: cPickle + if isinstance(f, str): + with open(f, 'w') as f: + pickle.dump(self, f, protocol) + else: + pickle.dump(self, f, protocol) + + #=========================================================================== + # copy and pickling + #=========================================================================== + def copy(self): + """Returns a (deep) copy of the current model""" + #raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" + import copy + memo = {} + memo[id(self._parent_)] = None + memo[id(self.gradient)] = None + memo[id(self.param_array)] = None + memo[id(self._fixes_)] = None + c = copy.deepcopy(self, memo) + c._parent_index_ = None + return c + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + def __getstate__(self): + ignore_list = ([#'_parent_', '_parent_index_', + #'observers', + '_param_array_', '_gradient_array_', '_fixes_', + '_Cacher_wrap__cachers'] + #+ self.parameter_names(recursive=False) + ) + dc = dict() + for k,v in self.__dict__.iteritems(): + if k not in ignore_list: + #if hasattr(v, "__getstate__"): + #dc[k] = v.__getstate__() + #else: + dc[k] = v + return dc + + def __setstate__(self, state): + self.__dict__.update(state) + return self + + #def __getstate__(self, memo): + # raise NotImplementedError, "get state must be implemented to be able to pickle objects" + + #def __setstate__(self, memo): + # raise NotImplementedError, "set state must be implemented to be able to pickle objects" + +class Gradcheckable(Pickleable, Parentable): """ Adds the functionality for an object to be gradcheckable. It is just a thin wrapper of a call to the highest parent for now. @@ -312,7 +314,7 @@ class Indexable(object): raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable): +class Constrainable(Nameable, Indexable, Observable): """ Make an object constrainable with Priors and Transformations. TODO: Mappings!! @@ -394,6 +396,7 @@ class Constrainable(Nameable, Indexable): self._fixes_[fixed_indices] = FIXED else: self._fixes_ = None + del self.constraints[__fixed__] def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size @@ -429,14 +432,14 @@ class Constrainable(Nameable, Indexable): def log_prior(self): """evaluate the prior""" if self.priors.size > 0: - x = self._param_array_ + x = self.param_array return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) return 0. def _log_prior_gradients(self): """evaluate the gradients of the priors""" if self.priors.size > 0: - x = self._param_array_ + x = self.param_array ret = np.zeros(x.size) [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret @@ -455,7 +458,7 @@ class Constrainable(Nameable, Indexable): Constrain the parameter to the given :py:class:`GPy.core.transformations.Transformation`. """ - self._param_array_[:] = transform.initialize(self._param_array_) + self.param_array[:] = transform.initialize(self.param_array) reconstrained = self.unconstrain() self._add_to_index_operations(self.constraints, reconstrained, transform, warning) self.notify_observers(self, None if trigger_parent else -np.inf) @@ -565,14 +568,14 @@ class OptimizationHandlable(Constrainable): super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw) def transform(self): - [np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self.param_array, ind, c.finv(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def untransform(self): - [np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] + [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] def _get_params_transformed(self): # transformed parameters (apply transformation rules) - p = self._param_array_.copy() + p = self.param_array.copy() [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool) @@ -583,14 +586,14 @@ class OptimizationHandlable(Constrainable): return p def _set_params_transformed(self, p): - if p is self._param_array_: + if p is self.param_array: p = p.copy() if self.has_parent() and self.constraints[__fixed__].size != 0: fixes = np.ones(self.size).astype(bool) fixes[self.constraints[__fixed__]] = FIXED - self._param_array_.flat[fixes] = p - elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p - else: self._param_array_.flat = p + self.param_array.flat[fixes] = p + elif self._has_fixes(): self.param_array.flat[self._fixes_] = p + else: self.param_array.flat = p self.untransform() self._trigger_params_changed() @@ -600,36 +603,29 @@ class OptimizationHandlable(Constrainable): def _size_transformed(self): return self.size - self.constraints[__fixed__].size -# -# def _untransform_params(self, p): -# # inverse apply transformations for parameters -# #p = p.copy() -# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp -# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] -# return p -# -# def _get_params(self): -# """ -# get all parameters -# """ -# return self._param_array_ -# p = np.empty(self.size, dtype=np.float64) -# if self.size == 0: -# return p -# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)] -# return p -# def _set_params(self, params, trigger_parent=True): -# self._param_array_.flat = params -# if trigger_parent: min_priority = None -# else: min_priority = -np.inf -# self.notify_observers(None, min_priority) - # don't overwrite this anymore! - # raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable" + @property + def num_params(self): + """ + Return the number of parameters of this parameter_handle. + Param objects will allways return 0. + """ + raise NotImplemented, "Abstract, please implement in respective classes" - #=========================================================================== - # Optimization handles: - #=========================================================================== + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + """ + Get the names of all parameters of this model. + + :param bool add_self: whether to add the own name in front of names + :param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names + :param bool recursive: whether to traverse through hierarchy and append leaf node names + """ + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names def _get_param_names(self): n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) return n @@ -663,16 +659,30 @@ class OptimizationHandlable(Constrainable): # For shared memory arrays. This does nothing in Param, but sets the memory # for all parameterized objects #=========================================================================== + @property + def full_gradient(self): + """ + Note to users: + This does not return the gradient in the right shape! Use self.gradient + for the right gradient array. + + To work on the gradient array, use this as the gradient handle. + This method exists for in memory use of parameters. + When trying to access the true gradient array, use this. + """ + self.gradient # <<< ensure _gradient_array_ + return self._gradient_array_ + def _propagate_param_grad(self, parray, garray): pi_old_size = 0 for pi in self._parameters_: pislice = slice(pi_old_size, pi_old_size + pi.size) - self._param_array_[pislice] = pi._param_array_.flat # , requirements=['C', 'W']).flat - self._gradient_array_[pislice] = pi._gradient_array_.flat # , requirements=['C', 'W']).flat + self.param_array[pislice] = pi.param_array.flat # , requirements=['C', 'W']).flat + self.full_gradient[pislice] = pi.full_gradient.flat # , requirements=['C', 'W']).flat - pi._param_array_.data = parray[pislice].data - pi._gradient_array_.data = garray[pislice].data + pi.param_array.data = parray[pislice].data + pi.full_gradient.data = garray[pislice].data pi._propagate_param_grad(parray[pislice], garray[pislice]) pi_old_size += pi.size @@ -681,26 +691,32 @@ class Parameterizable(OptimizationHandlable): def __init__(self, *args, **kwargs): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.lists_and_dicts import ArrayList - _parameters_ = ArrayList() + self._parameters_ = ArrayList() self.size = 0 - self._param_array_ = np.empty(self.size, dtype=np.float64) - self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._added_names_ = set() - def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): - """ - Get the names of all parameters of this model. + @property + def param_array(self): + if not hasattr(self, '_param_array_'): + self._param_array_ = np.empty(self.size, dtype=np.float64) + return self._param_array_ - :param bool add_self: whether to add the own name in front of names - :param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names - :param bool recursive: whether to traverse through hierarchy and append leaf node names - """ - if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) - else: adjust = lambda x: x - if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] - else: names = [adjust(x.name) for x in self._parameters_] - if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) - return names + @param_array.setter + def param_array(self, arr): + self._param_array_ = arr + + #========================================================================= + # Gradient handling + #========================================================================= + @property + def gradient(self): + if not hasattr(self, '_gradient_array_'): + self._gradient_array_ = np.empty(self.size, dtype=np.float64) + return self._gradient_array_ + + @gradient.setter + def gradient(self, val): + self._gradient_array_[:] = val @property def num_params(self): @@ -737,34 +753,6 @@ class Parameterizable(OptimizationHandlable): self._remove_parameter_name(None, old_name) self._add_parameter_name(param) - #========================================================================= - # Gradient handling - #========================================================================= - @property - def gradient(self): - return self._gradient_array_ - - @gradient.setter - def gradient(self, val): - self._gradient_array_[:] = val - #=========================================================================== - # def _collect_gradient(self, target): - # [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - #=========================================================================== - - #=========================================================================== - # def _set_params(self, params, trigger_parent=True): - # [p._set_params(params[s], trigger_parent=False) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - # if trigger_parent: min_priority = None - # else: min_priority = -np.inf - # self.notify_observers(None, min_priority) - #=========================================================================== - - #=========================================================================== - # def _set_gradient(self, g): - # [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - #=========================================================================== - def add_parameter(self, param, index=None, _ignore_added_names=False): """ :param parameters: the parameters to add @@ -864,7 +852,7 @@ class Parameterizable(OptimizationHandlable): # no parameters for this class return old_size = 0 - self._param_array_ = np.empty(self.size, dtype=np.float64) + self.param_array = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._param_slices_ = [] @@ -874,15 +862,15 @@ class Parameterizable(OptimizationHandlable): pslice = slice(old_size, old_size + p.size) # first connect all children - p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice]) + p._propagate_param_grad(self.param_array[pslice], self.full_gradient[pslice]) # then connect children to self - self._param_array_[pslice] = p._param_array_.flat # , requirements=['C', 'W']).ravel(order='C') - self._gradient_array_[pslice] = p._gradient_array_.flat # , requirements=['C', 'W']).ravel(order='C') + self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C') + self.full_gradient[pslice] = p.full_gradient.flat # , requirements=['C', 'W']).ravel(order='C') - if not p._param_array_.flags['C_CONTIGUOUS']: - import ipdb;ipdb.set_trace() - p._param_array_.data = self._param_array_[pslice].data - p._gradient_array_.data = self._gradient_array_[pslice].data + if not p.param_array.flags['C_CONTIGUOUS']: + raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" + p.param_array.data = self.param_array[pslice].data + p.full_gradient.data = self.full_gradient[pslice].data self._param_slices_.append(pslice) @@ -898,41 +886,22 @@ class Parameterizable(OptimizationHandlable): self.notify_observers(which=which) #=========================================================================== - # TODO: not working yet + # Pickling #=========================================================================== + def __setstate__(self, state): + super(Parameterizable, self).__setstate__(state) + self._connect_parameters() + self._connect_fixes() + self._notify_parent_change() + + self.parameters_changed() + def copy(self): - """Returns a (deep) copy of the current model""" - raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy" - import copy - from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView - from .lists_and_dicts import ArrayList - - dc = dict() - for k, v in self.__dict__.iteritems(): - if k not in ['_parent_', '_parameters_', '_parent_index_', '_observer_callables_'] + self.parameter_names(recursive=False): - if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): - dc[k] = v.copy() - else: - dc[k] = copy.deepcopy(v) - if k == '_parameters_': - params = [p.copy() for p in v] - - dc['_parent_'] = None - dc['_parent_index_'] = None - dc['_observer_callables_'] = [] - dc['_parameters_'] = ArrayList() - dc['constraints'].clear() - dc['priors'].clear() - dc['size'] = 0 - - s = self.__new__(self.__class__) - s.__dict__ = dc - - for p in params: - s.add_parameter(p, _ignore_added_names=True) - - return s - + c = super(Parameterizable, self).copy() + c._connect_parameters() + c._connect_fixes() + c._notify_parent_change() + return c #=========================================================================== # From being parentable, we have to define the parent_change notification #=========================================================================== diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 6460c988..a794ab40 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -17,7 +17,7 @@ class ParametersChangedMeta(type): instance.parameters_changed() return instance -class Parameterized(Parameterizable, Pickleable): +class Parameterized(Parameterizable): """ Parameterized class @@ -63,7 +63,7 @@ class Parameterized(Parameterizable, Pickleable): # Metaclass for parameters changed after init. # This makes sure, that parameters changed will always be called after __init__ # **Never** call parameters_changed() yourself - __metaclass__ = ParametersChangedMeta + __metaclass__ = ParametersChangedMeta #=========================================================================== def __init__(self, name=None, parameters=[], *a, **kw): super(Parameterized, self).__init__(name=name, *a, **kw) @@ -90,7 +90,7 @@ class Parameterized(Parameterizable, Pickleable): child_node = child.build_pydot(G) G.add_edge(pydot.Edge(node, child_node)) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) @@ -101,48 +101,13 @@ class Parameterized(Parameterizable, Pickleable): return G return node - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - For inheriting from Parameterized: - - Allways append the state of the inherited object - and call down to the inherited object in _setstate!! - """ - return [ - self._fixes_, - self.priors, - self.constraints, - self._parameters_, - self._name, - self._added_names_, - ] - - def _setstate(self, state): - self._added_names_ = state.pop() - self._name = state.pop() - self._parameters_ = state.pop() - self.constraints = state.pop() - self.priors = state.pop() - self._fixes_ = state.pop() - self._connect_parameters() - self.parameters_changed() - #=========================================================================== - # Override copy to handle programmatically added observers - #=========================================================================== - def copy(self): - c = super(Pickleable, self).copy() - c.add_observer(c, c._parameters_changed_notification, -100) - return c - #=========================================================================== # Gradient control #=========================================================================== def _transform_gradients(self, g): if self.has_parent(): return g - [numpy.put(g, i, g[i] * c.gradfactor(self._param_array_[i])) for c, i in self.constraints.iteritems() if c != __fixed__] + [numpy.put(g, i, g[i] * c.gradfactor(self.param_array[i])) for c, i in self.constraints.iteritems() if c != __fixed__] if self._has_fixes(): return g[self._fixes_] return g @@ -174,7 +139,7 @@ class Parameterized(Parameterizable, Pickleable): this is not in the global view of things! """ return numpy.r_[:self.size] - + #=========================================================================== # Convenience for fixed, tied checking of param: #=========================================================================== @@ -189,7 +154,7 @@ class Parameterized(Parameterizable, Pickleable): # you can retrieve the original param through this method, by passing # the copy here return self._parameters_[param._parent_index_] - + #=========================================================================== # Get/set parameters: #=========================================================================== @@ -206,7 +171,7 @@ class Parameterized(Parameterizable, Pickleable): def __getitem__(self, name, paramlist=None): if isinstance(name, (int, slice, tuple, np.ndarray)): - return self._param_array_[name] + return self.param_array[name] else: if paramlist is None: paramlist = self.grep_param_names(name) @@ -222,7 +187,7 @@ class Parameterized(Parameterizable, Pickleable): def __setitem__(self, name, value, paramlist=None): if isinstance(name, (slice, tuple, np.ndarray)): try: - self._param_array_[name] = value + self.param_array[name] = value except: raise ValueError, "Setting by slice or index only allowed with array-like" self._trigger_params_changed() diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index ac1dfc63..3730baed 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -66,7 +66,7 @@ class SpikeAndSlabPrior(VariationalPrior): self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) class VariationalPosterior(Parameterized): - def __init__(self, means=None, variances=None, name=None, *a, **kw): + def __init__(self, means=None, variances=None, name='latent space', *a, **kw): super(VariationalPosterior, self).__init__(name=name, *a, **kw) self.mean = Param("mean", means) self.variance = Param("variance", variances, Logexp()) @@ -124,6 +124,7 @@ class NormalPosterior(VariationalPosterior): import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ...plotting.matplot_dep import variational_plots + import matplotlib return variational_plots.plot(self,*args) class SpikeAndSlabPosterior(VariationalPosterior): diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 7bf0ca2a..7552b8ac 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -106,15 +106,3 @@ class SparseGP(GP): return mu, var - def _getstate(self): - """ - Get the current state of the class, - """ - return GP._getstate(self) + [ - self.Z, - self.num_inducing] - - def _setstate(self, state): - self.num_inducing = state.pop() - self.Z = state.pop() - GP._setstate(self, state) diff --git a/GPy/core/svigp.py b/GPy/core/svigp.py index a2c7acee..60e8371c 100644 --- a/GPy/core/svigp.py +++ b/GPy/core/svigp.py @@ -89,57 +89,57 @@ class SVIGP(GP): self._param_steplength_trace = [] self._vb_steplength_trace = [] - def _getstate(self): - steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength] - return GP._getstate(self) + \ - [self.get_vb_param(), - self.Z, - self.num_inducing, - self.has_uncertain_inputs, - self.X_variance, - self.X_batch, - self.X_variance_batch, - steplength_params, - self.batchcounter, - self.batchsize, - self.epochs, - self.momentum, - self.data_prop, - self._param_trace, - self._param_steplength_trace, - self._vb_steplength_trace, - self._ll_trace, - self._grad_trace, - self.Y, - self._permutation, - self.iterations - ] - - def _setstate(self, state): - self.iterations = state.pop() - self._permutation = state.pop() - self.Y = state.pop() - self._grad_trace = state.pop() - self._ll_trace = state.pop() - self._vb_steplength_trace = state.pop() - self._param_steplength_trace = state.pop() - self._param_trace = state.pop() - self.data_prop = state.pop() - self.momentum = state.pop() - self.epochs = state.pop() - self.batchsize = state.pop() - self.batchcounter = state.pop() - steplength_params = state.pop() - (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params - self.X_variance_batch = state.pop() - self.X_batch = state.pop() - self.X_variance = state.pop() - self.has_uncertain_inputs = state.pop() - self.num_inducing = state.pop() - self.Z = state.pop() - vb_param = state.pop() - GP._setstate(self, state) - self.set_vb_param(vb_param) +# def _getstate(self): +# steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength] +# return GP._getstate(self) + \ +# [self.get_vb_param(), +# self.Z, +# self.num_inducing, +# self.has_uncertain_inputs, +# self.X_variance, +# self.X_batch, +# self.X_variance_batch, +# steplength_params, +# self.batchcounter, +# self.batchsize, +# self.epochs, +# self.momentum, +# self.data_prop, +# self._param_trace, +# self._param_steplength_trace, +# self._vb_steplength_trace, +# self._ll_trace, +# self._grad_trace, +# self.Y, +# self._permutation, +# self.iterations +# ] +# +# def _setstate(self, state): +# self.iterations = state.pop() +# self._permutation = state.pop() +# self.Y = state.pop() +# self._grad_trace = state.pop() +# self._ll_trace = state.pop() +# self._vb_steplength_trace = state.pop() +# self._param_steplength_trace = state.pop() +# self._param_trace = state.pop() +# self.data_prop = state.pop() +# self.momentum = state.pop() +# self.epochs = state.pop() +# self.batchsize = state.pop() +# self.batchcounter = state.pop() +# steplength_params = state.pop() +# (self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params +# self.X_variance_batch = state.pop() +# self.X_batch = state.pop() +# self.X_variance = state.pop() +# self.has_uncertain_inputs = state.pop() +# self.num_inducing = state.pop() +# self.Z = state.pop() +# vb_param = state.pop() +# GP._setstate(self, state) +# self.set_vb_param(vb_param) def _compute_kernel_matrices(self): # kernel computations, using BGPLVM notation diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index ea997d63..07623d6b 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -277,7 +277,9 @@ def bgplvm_simulation(optimize=True, verbose=1, k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) #k = kern.RBF(Q, ARD=True, lengthscale=10.) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) - + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .1 + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -299,15 +301,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan - m.X.variance *= .1 + m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape) + m.likelihood.variance = .01 m.parameters_changed() m.Yreal = Y - + if optimize: print "Optimizing model:" m.optimize('bfgs', messages=verbose, max_iters=max_iters, @@ -321,18 +324,15 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy import kern from GPy.models import MRD - from GPy.likelihoods import Gaussian D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) - + #Ylist = [Ylist[0]] - k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))] - m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw) - - m['.*noise'] = [Y.var()/500. for Y in Ylist] - #for i, Y in enumerate(Ylist): - # m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500. + k = kern.Linear(Q, ARD=True) + m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw) + + m['.*noise'] = [Y.var()/40. for Y in Ylist] if optimize: print "Optimizing Model:" diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index bd3fcefb..074b67a6 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -32,7 +32,8 @@ class ExactGaussianInference(object): return Y else: #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. - raise NotImplementedError, 'TODO' #TODO + print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" + return Y def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index 12315a29..9ba3f83f 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -1,4 +1,4 @@ -# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) # #Parts of this file were influenced by the Matlab GPML framework written by @@ -91,7 +91,11 @@ class Laplace(object): iteration = 0 while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter: W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(grad)): + raise ValueError('One or more element(s) of grad is NaN') W_f = W*f @@ -141,25 +145,30 @@ class Laplace(object): """ #At this point get the hessian matrix (or vector as W is diagonal) W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) #compute vital matrices C = np.dot(LiW12, K) - Ki_W_i = K - C.T.dot(C) #Could this be wrong? + Ki_W_i = K - C.T.dot(C) #compute the log marginal log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L))) - #Compute vival matrices for derivatives + # Compute matrices for derivatives dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat - dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9. + if np.any(np.isnan(dW_df)): + raise ValueError('One or more element(s) of dW_df is NaN') + + dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9. #BiK, _ = dpotrs(L, K, lower=1) #dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i) #################### - #compute dL_dK# + # compute dL_dK # #################### if kern.size > 0 and not kern.is_fixed: #Explicit @@ -202,12 +211,12 @@ class Laplace(object): def _compute_B_statistics(self, K, W, log_concave): """ Rasmussen suggests the use of a numerically stable positive definite matrix B - Which has a positive diagonal element and can be easyily inverted + Which has a positive diagonal elements and can be easily inverted :param K: Prior Covariance matrix evaluated at locations X :type K: NxN matrix :param W: Negative hessian at a point (diagonal matrix) - :type W: Vector of diagonal values of hessian (1xN) + :type W: Vector of diagonal values of Hessian (1xN) :returns: (W12BiW12, L_B, Li_W12) """ if not log_concave: @@ -218,7 +227,8 @@ class Laplace(object): # If the likelihood is non-log-concave. We wan't to say that there is a negative variance # To cause the posterior to become less certain than the prior and likelihood, # This is a property only held by non-log-concave likelihoods - + if np.any(np.isnan(W)): + raise ValueError('One or more element(s) of W is NaN') #W is diagonal so its sqrt is just the sqrt of the diagonal elements W_12 = np.sqrt(W) B = np.eye(K.shape[0]) + W_12*K*W_12.T diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index ee2d6250..7344b204 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -23,6 +23,7 @@ class VarDTC(object): def __init__(self, limit=1): #self._YYTfactor_cache = caching.cache() from ...util.caching import Cacher + self.limit = limit self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) @@ -33,6 +34,17 @@ class VarDTC(object): def _get_trYYT(self, Y): return param_to_array(np.sum(np.square(Y))) + def __getstate__(self): + # has to be overridden, as Cacher objects cannot be pickled. + return self.limit + + def __setstate__(self, state): + # has to be overridden, as Cacher objects cannot be pickled. + self.limit = state + from ...util.caching import Cacher + self.get_trYYT = Cacher(self._get_trYYT, self.limit) + self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit) + def _get_YYTfactor(self, Y): """ find a matrix L which satisfies LLT = YYT. @@ -126,7 +138,7 @@ class VarDTC(object): delit += output_dim * np.eye(num_inducing) # Compute dL_dKmm dL_dKmm = backsub_both_sides(Lm, delit) - + # derivatives of L w.r.t. psi dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, @@ -179,6 +191,7 @@ class VarDTC(object): return post, log_marginal, grad_dict class VarDTCMissingData(object): + const_jitter = 1e-6 def __init__(self, limit=1): from ...util.caching import Cacher self._Y = Cacher(self._subarray_computations, limit) @@ -250,7 +263,7 @@ class VarDTCMissingData(object): for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] - else: beta = beta_all[0] + else: beta = beta_all VVT_factor = (beta*y) VVT_factor_all[v, ind].flat = VVT_factor.flat @@ -311,7 +324,7 @@ class VarDTCMissingData(object): het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, - data_fit, num_data, output_dim, trYYT) + data_fit, num_data, output_dim, trYYT, Y) if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf else: diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 0e265a64..37cd71ec 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,9 +1,9 @@ from _src.kern import Kern from _src.rbf import RBF -from _src.linear import Linear +from _src.linear import Linear, LinearFull from _src.static import Bias, White from _src.brownian import Brownian -from _src.sympykern import Sympykern +from _src.symbolic import Symbolic from _src.stationary import Exponential, Matern32, Matern52, ExpQuad, RatQuad, Cosine from _src.mlp import MLP from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 57e611ed..fb0e114b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -23,7 +23,6 @@ class Add(CombinationKernel): If a list of parts (of this kernel!) `which_parts` is given, only the parts of the list are taken to compute the covariance. """ - assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -33,7 +32,6 @@ class Add(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def Kdiag(self, X, which_parts=None): - assert X.shape[1] > max(np.r_[self.active_dims]) if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -160,21 +158,13 @@ class Add(CombinationKernel): target_S += b return target_mu, target_S - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return super(Add, self)._getstate() - - def _setstate(self, state): - super(Add, self)._setstate(state) - def add(self, other, name='sum'): if isinstance(other, Add): other_params = other._parameters_[:] for p in other_params: other.remove_parameter(p) self.add_parameters(*other_params) - else: self.add_parameter(other) + else: + self.add_parameter(other) + self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts) return self \ No newline at end of file diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index cf015d02..4a9671aa 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -54,85 +54,93 @@ class IndependentOutputs(CombinationKernel): self.kern = kernels super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name) self.index_dim = index_dim - self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0]) def K(self,X ,X2=None): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) - [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)] + [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)] else: slices2 = index_to_slices(X2[:,self.index_dim]) target = np.zeros((X.shape[0], X2.shape[0])) - [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] + [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)] return target def Kdiag(self,X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern target = np.zeros(X.shape[0]) - [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): slices = index_to_slices(X[:,self.index_dim]) - if self.single_kern: target = np.zeros(self.kern.size) - else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + if self.single_kern: + target = np.zeros(self.kern.size) + kerns = itertools.repeat(self.kern) + else: + kerns = self.kern + target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)] def collate_grads(kern, i, dL, X, X2): kern.update_gradients_full(dL,X,X2) if self.single_kern: target[:] += kern.gradient else: target[i][:] += kern.gradient if X2 is None: - [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))] + [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))] else: slices2 = index_to_slices(X2[:,self.index_dim]) - [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))] + [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))] if self.single_kern: kern.gradient = target - else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] def gradients_X(self,dL_dK, X, X2=None): target = np.zeros(X.shape) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if X2 is None: # TODO: make use of index_to_slices values = np.unique(X[:,self.index_dim]) slices = [X[:,self.index_dim]==i for i in values] [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) - for kern, s in zip(self.kerns, slices)] + for kern, s in zip(kerns, slices)] #slices = index_to_slices(X[:,self.index_dim]) #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) - # for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + # for s in slices_i] for kern, slices_i in zip(kerns, slices)] #import ipdb;ipdb.set_trace() #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), # np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss])) - # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)] + # for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)] else: values = np.unique(X[:,self.index_dim]) slices = [X[:,self.index_dim]==i for i in values] slices2 = [X2[:,self.index_dim]==i for i in values] [target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2])) - for kern, s, s2 in zip(self.kerns, slices, slices2)] + for kern, s, s2 in zip(kerns, slices, slices2)] # TODO: make work with index_to_slices #slices = index_to_slices(X[:,self.index_dim]) #slices2 = index_to_slices(X2[:,self.index_dim]) - #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)] + #[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)] return target def gradients_X_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern target = np.zeros(X.shape) - [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)] + [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)] return target def update_gradients_diag(self, dL_dKdiag, X): slices = index_to_slices(X[:,self.index_dim]) + kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if self.single_kern: target = np.zeros(self.kern.size) - else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)] + else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)] def collate_grads(kern, i, dL, X): kern.update_gradients_diag(dL,X) if self.single_kern: target[:] += kern.gradient else: target[i][:] += kern.gradient - [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))] + [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))] if self.single_kern: kern.gradient = target - else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))] + else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] class Hierarchical(CombinationKernel): """ @@ -148,30 +156,30 @@ class Hierarchical(CombinationKernel): def __init__(self, kern, name='hierarchy'): assert all([k.input_dim==kerns[0].input_dim for k in kerns]) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) - self.kerns = kerns - self.add_parameters(self.kerns) + kerns = kerns + self.add_parameters(kerns) def K(self,X ,X2=None): - X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] - K = self.kerns[0].K(X, X2) + X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)] + K = kerns[0].K(X, X2) if X2 is None: - [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] + [[[np.copyto(K[s,s], k.K(X[s], None)) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)] else: X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) - [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(self.kerns[1:], slices, slices2)] + [[[[np.copyto(K[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_k,slices_k2)] for k, slices_k, slices_k2 in zip(kerns[1:], slices, slices2)] return target def Kdiag(self,X): - X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(self.kerns[0].input_dim, self.input_dim)] - K = self.kerns[0].K(X, X2) - [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.kerns[1:], slices)] + X, slices = X[:,:-self.levels], [index_to_slices(X[:,i]) for i in range(kerns[0].input_dim, self.input_dim)] + K = kerns[0].K(X, X2) + [[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(kerns[1:], slices)] return target def update_gradients_full(self,dL_dK,X,X2=None): X,slices = X[:,:-1],index_to_slices(X[:,-1]) if X2 is None: - self.kerns[0].update_gradients_full(dL_dK, X, None) - for k, slices_k in zip(self.kerns[1:], slices): + kerns[0].update_gradients_full(dL_dK, X, None) + for k, slices_k in zip(kerns[1:], slices): target = np.zeros(k.size) def collate_grads(dL, X, X2): k.update_gradients_full(dL,X,X2) @@ -180,8 +188,8 @@ class Hierarchical(CombinationKernel): k._set_gradient(target) else: X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1]) - self.kerns[0].update_gradients_full(dL_dK, X, None) - for k, slices_k in zip(self.kerns[1:], slices): + kerns[0].update_gradients_full(dL_dK, X, None) + for k, slices_k in zip(kerns[1:], slices): target = np.zeros(k.size) def collate_grads(dL, X, X2): k.update_gradients_full(dL,X,X2) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index be8a15b2..dbe4c1f8 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -11,13 +11,12 @@ from ...util.caching import Cache_this class Kern(Parameterized): #=========================================================================== - # This adds input slice support. The rather ugly code for slicing can be + # This adds input slice support. The rather ugly code for slicing can be # found in kernel_slice_operations __metaclass__ = KernCallsViaSlicerMeta #=========================================================================== - _debug=False _support_GPU=False - def __init__(self, input_dim, active_dims, name, useGPU=False,*a, **kw): + def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -178,22 +177,6 @@ class Kern(Parameterized): #else: kernels.append(other) return Prod([self, other], name) - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return super(Kern, self)._getstate() + [ - self.active_dims, - self.input_dim, - self._sliced_X] - - def _setstate(self, state): - self._sliced_X = state.pop() - self.input_dim = state.pop() - self.active_dims = state.pop() - super(Kern, self)._setstate(state) - class CombinationKernel(Kern): """ Abstract super class for combination kernels. @@ -211,9 +194,7 @@ class CombinationKernel(Kern): :param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on """ assert all([isinstance(k, Kern) for k in kernels]) - active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) - input_dim = active_dims.max()+1 + len(extra_dims) - active_dims = slice(active_dims.max()+1+len(extra_dims)) + input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims) # initialize the kernel with the full input_dim super(CombinationKernel, self).__init__(input_dim, active_dims, name) self.extra_dims = extra_dims @@ -223,6 +204,12 @@ class CombinationKernel(Kern): def parts(self): return self._parameters_ + def get_input_dim_active_dims(self, kernels, extra_dims = None): + active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int)) + input_dim = active_dims.max()+1 + (len(np.r_[extra_dims]) if extra_dims is not None else 0) + active_dims = slice(0, input_dim, 1) + return input_dim, active_dims + def input_sensitivity(self): in_sen = np.zeros((self.num_params, self.input_dim)) for i, p in enumerate(self.parts): diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index 9beb40ab..a4bb8f62 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -5,130 +5,135 @@ Created on 11 Mar 2014 ''' from ...core.parameterization.parameterized import ParametersChangedMeta import numpy as np +from functools import wraps + +def put_clean(dct, name, func): + if name in dct: + dct['_clean_{}'.format(name)] = dct[name] + dct[name] = func(dct[name]) class KernCallsViaSlicerMeta(ParametersChangedMeta): - def __call__(self, *args, **kw): - instance = super(ParametersChangedMeta, self).__call__(*args, **kw) - instance.K = _slice_wrapper(instance, instance.K) - instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) - instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) - instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) - instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) - instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) - instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) - instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) - instance.parameters_changed() - return instance + def __new__(cls, name, bases, dct): + put_clean(dct, 'K', _slice_K) + put_clean(dct, 'Kdiag', _slice_Kdiag) + put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) + put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) + put_clean(dct, 'gradients_X', _slice_gradients_X) + put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) -def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): - """ - This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. - The different switches are: - diag: if X2 exists - derivative: if first arg is dL_dK - psi_stat: if first 3 args are dL_dpsi0..2 - psi_stat_Z: if first 2 args are dL_dpsi1..2 - """ - if derivative: - if diag: - def x_slice_wrapper(dL_dKdiag, X): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret = np.zeros(X.shape) - X = kern._slice_X(X) if not kern._sliced_X else X - # if the return value is of shape X.shape, we need to make sure to return the right shape - kern._sliced_X += 1 - try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) - else: ret = operation(dL_dKdiag, X) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - elif psi_stat: - def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) - Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior - kern._sliced_X += 1 - # if the return value is of shape X.shape, we need to make sure to return the right shape - try: - if ret_X_not_sliced: - ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) - r2 = ret[:2] - ret[0] = ret1 - ret[1] = ret2 - ret[0][:, kern.active_dims] = r2[0] - ret[1][:, kern.active_dims] = r2[1] - del r2 - else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - elif psi_stat_Z: - def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: ret = np.zeros(Z.shape) - Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior - kern._sliced_X += 1 - try: - if ret_X_not_sliced: - ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) - else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) - except: - raise - finally: - kern._sliced_X -= 1 - return ret + put_clean(dct, 'psi0', _slice_psi) + put_clean(dct, 'psi1', _slice_psi) + put_clean(dct, 'psi2', _slice_psi) + put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations) + put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations) + put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations) + return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) + +class _Slice_wrap(object): + def __init__(self, k, X, X2=None): + self.k = k + self.shape = X.shape + if self.k._sliced_X == 0: + assert X.shape[1] > max(np.r_[self.k.active_dims]), "At least {} dimensional X needed".format(max(np.r_[self.k.active_dims])) + self.X = self.k._slice_X(X) + self.X2 = self.k._slice_X(X2) if X2 is not None else X2 + self.ret = True else: - def x_slice_wrapper(dL_dK, X, X2=None): - ret_X_not_sliced = ret_X and kern._sliced_X == 0 - if ret_X_not_sliced: - ret = np.zeros(X.shape) - X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 - kern._sliced_X += 1 - try: - if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) - else: ret = operation(dL_dK, X, X2) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - else: - if diag: - def x_slice_wrapper(X, *args, **kw): - X = kern._slice_X(X) if not kern._sliced_X else X - kern._sliced_X += 1 - try: - ret = operation(X, *args, **kw) - except: - raise - finally: - kern._sliced_X -= 1 - return ret - else: - def x_slice_wrapper(X, X2=None, *args, **kw): - X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 - kern._sliced_X += 1 - try: - ret = operation(X, X2, *args, **kw) - except: raise - finally: - kern._sliced_X -= 1 - return ret - x_slice_wrapper._operation = operation - x_slice_wrapper.__name__ = ("slicer("+str(operation) - +(","+str(bool(diag)) if diag else'') - +(','+str(bool(derivative)) if derivative else '') - +')') - x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "") - return x_slice_wrapper \ No newline at end of file + self.X = X + self.X2 = X2 + self.ret = False + def __enter__(self): + self.k._sliced_X += 1 + return self + def __exit__(self, *a): + self.k._sliced_X -= 1 + def handle_return_array(self, return_val): + if self.ret: + ret = np.zeros(self.shape) + ret[:, self.k.active_dims] = return_val + return ret + return return_val + +def _slice_K(f): + @wraps(f) + def wrap(self, X, X2 = None, *a, **kw): + with _Slice_wrap(self, X, X2) as s: + ret = f(self, s.X, s.X2, *a, **kw) + return ret + return wrap + +def _slice_Kdiag(f): + @wraps(f) + def wrap(self, X, *a, **kw): + with _Slice_wrap(self, X, None) as s: + ret = f(self, s.X, *a, **kw) + return ret + return wrap + +def _slice_update_gradients_full(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None): + with _Slice_wrap(self, X, X2) as s: + ret = f(self, dL_dK, s.X, s.X2) + return ret + return wrap + +def _slice_update_gradients_diag(f): + @wraps(f) + def wrap(self, dL_dKdiag, X): + with _Slice_wrap(self, X, None) as s: + ret = f(self, dL_dKdiag, s.X) + return ret + return wrap + +def _slice_gradients_X(f): + @wraps(f) + def wrap(self, dL_dK, X, X2=None): + with _Slice_wrap(self, X, X2) as s: + ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2)) + return ret + return wrap + +def _slice_gradients_X_diag(f): + @wraps(f) + def wrap(self, dL_dKdiag, X): + with _Slice_wrap(self, X, None) as s: + ret = s.handle_return_array(f(self, dL_dKdiag, s.X)) + return ret + return wrap + +def _slice_psi(f): + @wraps(f) + def wrap(self, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = f(self, s.X, s.X2) + return ret + return wrap + +def _slice_update_gradients_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2) + return ret + return wrap + +def _slice_gradients_Z_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, Z, variational_posterior) as s: + ret = s.handle_return_array(f(self, dL_dpsi1, dL_dpsi2, s.X, s.X2)) + return ret + return wrap + +def _slice_gradients_qX_expectations(f): + @wraps(f) + def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + with _Slice_wrap(self, variational_posterior, Z) as s: + ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X)) + r2 = ret[:2] + ret[0] = s.handle_return_array(r2[0]) + ret[1] = s.handle_return_array(r2[1]) + del r2 + return ret + return wrap diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 7d9eeac2..f9dacf02 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -121,7 +121,7 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu) -# return (self.variances*gamma*mu).sum(axis=1) +# return (self.variances*gamma*mu).sum(axis=1) else: return self.K(variational_posterior.mean, Z) #the variance, it does nothing @@ -177,7 +177,7 @@ class Linear(Kern): grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\ np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ) - + return grad else: #psi1 @@ -191,15 +191,15 @@ class Linear(Kern): gamma = variational_posterior.binary_prob mu = variational_posterior.mean S = variational_posterior.variance - mu2S = np.square(mu)+S + mu2S = np.square(mu)+S _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) - + grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma) grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\ np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu) grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS) - + return grad_mu, grad_S, grad_gamma else: grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) @@ -210,7 +210,7 @@ class Linear(Kern): grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) # psi2 self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S) - + return grad_mu, grad_S #--------------------------------------------------# @@ -313,3 +313,47 @@ class Linear(Kern): def input_sensitivity(self): return np.ones(self.input_dim) * self.variances + +class LinearFull(Kern): + def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'): + super(LinearFull, self).__init__(input_dim, active_dims, name) + if W is None: + W = np.ones((input_dim, rank)) + if kappa is None: + kappa = np.ones(input_dim) + assert W.shape == (input_dim, rank) + assert kappa.shape == (input_dim,) + + self.W = Param('W', W) + self.kappa = Param('kappa', kappa, Logexp()) + self.add_parameters(self.W, self.kappa) + + def K(self, X, X2=None): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2) + + def update_gradients_full(self, dL_dK, X, X2=None): + self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2) + self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W) + self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W) + + def Kdiag(self, X): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return np.einsum('ij,jk,ik->i', X, P, X) + + def update_gradients_diag(self, dL_dKdiag, X): + self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag) + self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag) + + def gradients_X(self, dL_dK, X, X2=None): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + if X2 is None: + return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P) + else: + return np.einsum('ij,jk,kl->il', dL_dK, X2, P) + + def gradients_X_diag(self, dL_dKdiag, X): + P = np.dot(self.W, self.W.T) + np.diag(self.kappa) + return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X) + + diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e00f38c3..b8f92f27 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -23,7 +23,6 @@ class Prod(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def K(self, X, X2=None, which_parts=None): - assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts elif not isinstance(which_parts, (list, tuple)): @@ -33,7 +32,6 @@ class Prod(CombinationKernel): @Cache_this(limit=2, force_kwargs=['which_parts']) def Kdiag(self, X, which_parts=None): - assert X.shape[1] == self.input_dim if which_parts is None: which_parts = self.parts return reduce(np.multiply, (p.Kdiag(X) for p in which_parts)) @@ -58,8 +56,6 @@ class Prod(CombinationKernel): def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X) return target - - diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 2534ad9b..e08d94f9 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -74,9 +74,6 @@ class RBF(Stationary): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU: -# dL_dpsi0_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi0)) -# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1)) -# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2)) self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: @@ -99,7 +96,7 @@ class RBF(Stationary): self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) else: self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() - + elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale**2 if l2.size != self.input_dim: @@ -107,8 +104,6 @@ class RBF(Stationary): #contributions from psi0: self.variance.gradient = np.sum(dL_dpsi0) - if self._debug: - num_grad = self.lengthscale.gradient.copy() self.lengthscale.gradient = 0. #from psi1 @@ -128,8 +123,6 @@ class RBF(Stationary): else: self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) - if self._debug: - import ipdb;ipdb.set_trace() self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance else: @@ -139,8 +132,6 @@ class RBF(Stationary): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU: -# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1)) -# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2)) return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) @@ -177,8 +168,6 @@ class RBF(Stationary): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if self.useGPU: -# dL_dpsi1_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi1)) -# dL_dpsi2_gpu = gpuarray.to_gpu(np.asfortranarray(dL_dpsi2)) return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: ndata = variational_posterior.mean.shape[0] diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/symbolic.py similarity index 56% rename from GPy/kern/_src/sympykern.py rename to GPy/kern/_src/symbolic.py index 6f066e98..c7bbae73 100644 --- a/GPy/kern/_src/sympykern.py +++ b/GPy/kern/_src/symbolic.py @@ -11,9 +11,9 @@ from kern import Kern from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -class Sympykern(Kern): +class Symbolic(Kern): """ - A kernel object, where all the hard work in done by sympy. + A kernel object, where all the hard work is done by sympy. :param k: the covariance function :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... @@ -26,10 +26,8 @@ class Sympykern(Kern): - to handle multiple inputs, call them x_1, z_1, etc - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. """ - def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None): + def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None): - if name is None: - name='sympykern' if k is None: raise ValueError, "You must provide an argument for the covariance function." super(Sympykern, self).__init__(input_dim, active_dims, name) @@ -60,7 +58,6 @@ class Sympykern(Kern): # extract parameter names from the covariance thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - # Look for parameters with index (subscripts), they are associated with different outputs. if self.output_dim>1: self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) @@ -97,8 +94,8 @@ class Sympykern(Kern): val = 1.0 # TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class. if param is not None: - if param.has_key(theta): - val = param[theta] + if param.has_key(theta.name): + val = param[theta.name] setattr(self, theta.name, Param(theta.name, val, None)) self.add_parameters(getattr(self, theta.name)) @@ -117,6 +114,12 @@ class Sympykern(Kern): self.arg_list += self._sp_theta_i + self._sp_theta_j self.diag_arg_list += self._sp_theta_i + # Check if there are additional linear operators on the covariance. + self._sp_operators = operators + # TODO: Deal with linear operators + #if self._sp_operators: + # for operator in self._sp_operators: + # psi_stats aren't yet implemented. if False: self.compute_psi_stats() @@ -254,3 +257,176 @@ class Sympykern(Kern): self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T +if False: + class Symcombine(CombinationKernel): + """ + Combine list of given sympy covariances together with the provided operations. + """ + def __init__(self, subkerns, operations, name='sympy_combine'): + super(Symcombine, self).__init__(subkerns, name) + for subkern, operation in zip(subkerns, operations): + self._sp_k += self._k_double_operate(subkern._sp_k, operation) + + #def _double_operate(self, k, operation): + + + @Cache_this(limit=2, force_kwargs=['which_parts']) + def K(self, X, X2=None, which_parts=None): + """ + Combine covariances with a linear operator. + """ + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.add, (p.K(X, X2) for p in which_parts)) + + @Cache_this(limit=2, force_kwargs=['which_parts']) + def Kdiag(self, X, which_parts=None): + assert X.shape[1] == self.input_dim + if which_parts is None: + which_parts = self.parts + elif not isinstance(which_parts, (list, tuple)): + # if only one part is given + which_parts = [which_parts] + return reduce(np.add, (p.Kdiag(X) for p in which_parts)) + + def update_gradients_full(self, dL_dK, X, X2=None): + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + + def update_gradients_diag(self, dL_dK, X): + [p.update_gradients_diag(dL_dK, X) for p in self.parts] + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] + return target + + def psi0(self, Z, variational_posterior): + return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) + + def psi1(self, Z, variational_posterior): + return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) + + def psi2(self, Z, variational_posterior): + psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) + #return psi2 + # compute the "cross" terms + from static import White, Bias + from rbf import RBF + #from rbf_inv import RBFInv + from linear import Linear + #ffrom fixed import Fixed + + for p1, p2 in itertools.combinations(self.parts, 2): + # i1, i2 = p1.active_dims, p2.active_dims + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): + tmp = p2.psi1(Z, variational_posterior) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z, variational_posterior) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)): + assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far" + tmp1 = p1.psi1(Z, variational_posterior) + tmp2 = p2.psi1(Z, variational_posterior) + psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 + + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + for p1 in self.parts: + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self.parts: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + target = np.zeros(Z.shape) + for p1 in self.parts: + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self.parts: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + return target + + def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + from static import White, Bias + target_mu = np.zeros(variational_posterior.shape) + target_S = np.zeros(variational_posterior.shape) + for p1 in self._parameters_: + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2 in self._parameters_: + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. + a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target_mu += a + target_S += b + return target_mu, target_S + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return super(Add, self)._getstate() + + def _setstate(self, state): + super(Add, self)._setstate(state) + + def add(self, other, name='sum'): + if isinstance(other, Add): + other_params = other._parameters_.copy() + for p in other_params: + other.remove_parameter(p) + self.add_parameters(*other_params) + else: self.add_parameter(other) + return self diff --git a/GPy/likelihoods/.DS_Store b/GPy/likelihoods/.DS_Store deleted file mode 100644 index 8228ae90..00000000 Binary files a/GPy/likelihoods/.DS_Store and /dev/null differ diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 28e44541..d7ad5753 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -6,3 +6,17 @@ from poisson import Poisson from student_t import StudentT from likelihood import Likelihood from mixed_noise import MixedNoise +# TODO need to fix this in a config file. +try: + import sympy as sym + sympy_available=True +except ImportError: + sympy_available=False +if sympy_available: + # These are likelihoods that rely on symbolic. + from symbolic import Symbolic + #from sstudent_t import SstudentT + from negative_binomial import Negative_binomial + #from skew_normal import Skew_normal + #from skew_exponential import Skew_exponential + #from null_category import Null_category diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py index 371fbe63..7b867954 100644 --- a/GPy/likelihoods/bernoulli.py +++ b/GPy/likelihoods/bernoulli.py @@ -15,7 +15,7 @@ class Bernoulli(Likelihood): p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} .. Note:: - Y is expected to take values in {-1, 1} TODO: {0, 1}?? + Y takes values in either {-1, 1} or {0, 1}. link function should have the domain [0, 1], e.g. probit (default) or Heaviside .. See also:: @@ -54,10 +54,10 @@ class Bernoulli(Likelihood): """ if Y_i == 1: sign = 1. - elif Y_i == 0: + elif Y_i == 0 or Y_i == -1: sign = -1 else: - raise ValueError("bad value for Bernouilli observation (0, 1)") + raise ValueError("bad value for Bernoulli observation (0, 1)") if isinstance(self.gp_link, link_functions.Probit): z = sign*v_i/np.sqrt(tau_i**2 + tau_i) Z_hat = std_norm_cdf(z) @@ -95,15 +95,15 @@ class Bernoulli(Likelihood): else: return np.nan - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): """ - Likelihood function given link(f) + Likelihood function given inverse link of f. .. math:: p(y_{i}|\\lambda(f_{i})) = \\lambda(f_{i})^{y_{i}}(1-f_{i})^{1-y_{i}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli @@ -113,102 +113,106 @@ class Bernoulli(Likelihood): .. Note: Each y_i must be in {0, 1} """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #objective = (link_f**y) * ((1.-link_f)**(1.-y)) - objective = np.where(y, link_f, 1.-link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #objective = (inv_link_f**y) * ((1.-inv_link_f)**(1.-y)) + objective = np.where(y, inv_link_f, 1.-inv_link_f) return np.exp(np.sum(np.log(objective))) - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ - Log Likelihood function given link(f) + Log Likelihood function given inverse link of f. .. math:: \\ln p(y_{i}|\\lambda(f_{i})) = y_{i}\\log\\lambda(f_{i}) + (1-y_{i})\\log (1-f_{i}) - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: log likelihood evaluated at points link(f) + :returns: log likelihood evaluated at points inverse link of f. :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #objective = y*np.log(link_f) + (1.-y)*np.log(link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #objective = y*np.log(inv_link_f) + (1.-y)*np.log(inv_link_f) state = np.seterr(divide='ignore') - objective = np.where(y==1, np.log(link_f), np.log(1-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + objective = np.where(y==1, np.log(inv_link_f), np.log(1-inv_link_f)) np.seterr(**state) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): """ - Gradient of the pdf at y, given link(f) w.r.t link(f) + Gradient of the pdf at y, given inverse link of f w.r.t inverse link of f. .. math:: \\frac{d\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\frac{y_{i}}{\\lambda(f_{i})} - \\frac{(1 - y_{i})}{(1 - \\lambda(f_{i}))} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: gradient of log likelihood evaluated at points link(f) + :returns: gradient of log likelihood evaluated at points inverse link of f. :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #grad = (y/link_f) - (1.-y)/(1-link_f) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #grad = (y/inv_link_f) - (1.-y)/(1-inv_link_f) state = np.seterr(divide='ignore') - grad = np.where(y, 1./link_f, -1./(1-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + grad = np.where(y, 1./inv_link_f, -1./(1-inv_link_f)) np.seterr(**state) return grad - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ - Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j - i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j) + Hessian at y, given inv_link_f, w.r.t inv_link_f the hessian will be 0 unless i == j + i.e. second derivative logpdf at y given inverse link of f_i and inverse link of f_j w.r.t inverse link of f_i and inverse link of f_j. .. math:: \\frac{d^{2}\\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)^{2}} = \\frac{-y_{i}}{\\lambda(f)^{2}} - \\frac{(1-y_{i})}{(1-\\lambda(f))^{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f)) + :returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points inverse link of f. :rtype: Nx1 array .. Note:: Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases - (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) + (the distribution for y_i depends only on inverse link of f_i not on inverse link of f_(j!=i) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #d2logpdf_dlink2 = -y/(link_f**2) - (1-y)/((1-link_f)**2) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #d2logpdf_dlink2 = -y/(inv_link_f**2) - (1-y)/((1-inv_link_f)**2) state = np.seterr(divide='ignore') - d2logpdf_dlink2 = np.where(y, -1./np.square(link_f), -1./np.square(1.-link_f)) + # TODO check y \in {0, 1} or {-1, 1} + d2logpdf_dlink2 = np.where(y, -1./np.square(inv_link_f), -1./np.square(1.-inv_link_f)) np.seterr(**state) return d2logpdf_dlink2 - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ - Third order derivative log-likelihood function at y given link(f) w.r.t link(f) + Third order derivative log-likelihood function at y given inverse link of f w.r.t inverse link of f .. math:: \\frac{d^{3} \\ln p(y_{i}|\\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{2y_{i}}{\\lambda(f)^{3}} - \\frac{2(1-y_{i}}{(1-\\lambda(f))^{3}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables passed through inverse link of f. + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata not used in bernoulli - :returns: third derivative of log likelihood evaluated at points link(f) + :returns: third derivative of log likelihood evaluated at points inverse_link(f) :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - #d3logpdf_dlink3 = 2*(y/(link_f**3) - (1-y)/((1-link_f)**3)) + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + #d3logpdf_dlink3 = 2*(y/(inv_link_f**3) - (1-y)/((1-inv_link_f)**3)) state = np.seterr(divide='ignore') - d3logpdf_dlink3 = np.where(y, 2./(link_f**3), -2./((1.-link_f)**3)) + # TODO check y \in {0, 1} or {-1, 1} + d3logpdf_dlink3 = np.where(y, 2./(inv_link_f**3), -2./((1.-inv_link_f)**3)) np.seterr(**state) return d3logpdf_dlink3 diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index aabe93ef..33b51536 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -16,20 +16,20 @@ class Likelihood(Parameterized): Likelihood base class, used to defing p(y|f). All instances use _inverse_ link functions, which can be swapped out. It is - expected that inherriting classes define a default inverse link function + expected that inheriting classes define a default inverse link function - To use this class, inherrit and define missing functionality. + To use this class, inherit and define missing functionality. - Inherriting classes *must* implement: + Inheriting classes *must* implement: pdf_link : a bound method which turns the output of the link function into the pdf logpdf_link : the logarithm of the above - To enable use with EP, inherriting classes *must* define: + To enable use with EP, inheriting classes *must* define: TODO: a suitable derivative function for any parameters of the class It is also desirable to define: moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature. - To enable use with Laplace approximation, inherriting classes *must* define: + To enable use with Laplace approximation, inheriting classes *must* define: Some derivative functions *AS TODO* For exact Gaussian inference, define *JH TODO* @@ -159,7 +159,7 @@ class Likelihood(Parameterized): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): """ - Numerical approximation to the predictive variance: V(Y_star) + Approximation to the predictive variance: V(Y_star) The following variance decomposition is used: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) @@ -208,28 +208,28 @@ class Likelihood(Parameterized): # V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2 return exp_var + var_exp - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None): + def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError - def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): raise NotImplementedError def pdf(self, f, y, Y_metadata=None): @@ -247,8 +247,8 @@ class Likelihood(Parameterized): :returns: likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - return self.pdf_link(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) def logpdf(self, f, y, Y_metadata=None): """ @@ -265,8 +265,8 @@ class Likelihood(Parameterized): :returns: log likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - return self.logpdf_link(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) def dlogpdf_df(self, f, y, Y_metadata=None): """ @@ -284,8 +284,8 @@ class Likelihood(Parameterized): :returns: derivative of log likelihood evaluated for this point :rtype: 1xN array """ - link_f = self.gp_link.transf(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) return chain_1(dlogpdf_dlink, dlink_df) @@ -305,10 +305,10 @@ class Likelihood(Parameterized): :returns: second derivative of log likelihood evaluated for this point (diagonal only) :rtype: 1xN array """ - link_f = self.gp_link.transf(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) @@ -328,12 +328,12 @@ class Likelihood(Parameterized): :returns: third derivative of log likelihood evaluated for this point :rtype: float """ - link_f = self.gp_link.transf(f) - d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) dlink_df = self.gp_link.dtransf_df(f) - d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata) + d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) d2link_df2 = self.gp_link.d2transf_df2(f) - dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) d3link_df3 = self.gp_link.d3transf_df3(f) return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) @@ -342,10 +342,10 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) - return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata) + inv_link_f = self.gp_link.transf(f) + return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([1, 0]) def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): @@ -353,12 +353,12 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) + inv_link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return chain_1(dlogpdf_dlink_dtheta, dlink_df) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([f.shape[0], 0]) def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): @@ -366,14 +366,14 @@ class Likelihood(Parameterized): TODO: Doc strings """ if self.size > 0: - link_f = self.gp_link.transf(f) + inv_link_f = self.gp_link.transf(f) dlink_df = self.gp_link.dtransf_df(f) d2link_df2 = self.gp_link.d2transf_df2(f) - d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata) - dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata) + d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) + dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2) else: - #Is no parameters so return an empty array for its derivatives + # There are no parameters so return an empty array for derivatives return np.zeros([f.shape[0], 0]) def _laplace_gradients(self, f, y, Y_metadata=None): @@ -383,12 +383,9 @@ class Likelihood(Parameterized): #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize - try: - assert len(dlogpdf_dtheta) == self.size #1 x num_param array - assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix - assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix - except Exception as e: - import ipdb; ipdb.set_trace() # XXX BREAKPOINT + assert len(dlogpdf_dtheta) == self.size #1 x num_param array + assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix + assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta @@ -411,7 +408,10 @@ class Likelihood(Parameterized): #compute the quantiles by sampling!!! N_samp = 1000 s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu + #ss_f = s.flatten() + #ss_y = self.samples(ss_f, Y_metadata) ss_y = self.samples(s, Y_metadata) + #ss_y = ss_y.reshape(mu.shape[0], N_samp) return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 942fe2f4..86384155 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -71,6 +71,7 @@ class Probit(GPTransformation): g(f) = \\Phi^{-1} (mu) + """ def transf(self,f): return std_norm_cdf(f) diff --git a/GPy/likelihoods/negative_binomial.py b/GPy/likelihoods/negative_binomial.py new file mode 100644 index 00000000..5bc5b727 --- /dev/null +++ b/GPy/likelihoods/negative_binomial.py @@ -0,0 +1,46 @@ +# Copyright (c) 2014 The GPy authors (see AUTHORS.txt) +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +try: + import sympy as sym + sympy_available=True + from sympy.utilities.lambdify import lambdify + from GPy.util.symbolic import gammaln, ln_cum_gaussian, cum_gaussian +except ImportError: + sympy_available=False + +import numpy as np +from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf +import link_functions +from symbolic import Symbolic +from scipy import stats + +if sympy_available: + class Negative_binomial(Symbolic): + """ + Negative binomial + + .. math:: + p(y_{i}|\pi(f_{i})) = \left(\frac{r}{r+f_i}\right)^r \frac{\Gamma(r+y_i)}{y!\Gamma(r)}\left(\frac{f_i}{r+f_i}\right)^{y_i} + + .. Note:: + Y takes non zero integer values.. + link function should have a positive domain, e.g. log (default). + + .. See also:: + symbolic.py, for the parent class + """ + def __init__(self, gp_link=None): + if gp_link is None: + gp_link = link_functions.Log() + + dispersion = sym.Symbol('dispersion', positive=True, real=True) + y = sym.Symbol('y', nonnegative=True, integer=True) + f = sym.Symbol('f', positive=True, real=True) + log_pdf=dispersion*sym.log(dispersion) - (dispersion+y)*sym.log(dispersion+f) + gammaln(y+dispersion) - gammaln(y+1) - gammaln(dispersion) + y*sym.log(f) + super(Negative_binomial, self).__init__(log_pdf=log_pdf, gp_link=gp_link, name='Negative_binomial') + + # TODO: Check this. + self.log_concave = False + diff --git a/GPy/likelihoods/student_t.py b/GPy/likelihoods/student_t.py index 47efd443..c057e789 100644 --- a/GPy/likelihoods/student_t.py +++ b/GPy/likelihoods/student_t.py @@ -26,8 +26,8 @@ class StudentT(Likelihood): gp_link = link_functions.Identity() super(StudentT, self).__init__(gp_link, name='Student_T') - - self.sigma2 = Param('t_noise', float(sigma2), Logexp()) + # sigma2 is not a noise parameter, it is a squared scale. + self.sigma2 = Param('t_scale2', float(sigma2), Logexp()) self.v = Param('deg_free', float(deg_free)) self.add_parameter(self.sigma2) self.add_parameter(self.v) @@ -46,23 +46,23 @@ class StudentT(Likelihood): self.sigma2.gradient = grads[0] self.v.gradient = grads[1] - def pdf_link(self, link_f, y, Y_metadata=None): + def pdf_link(self, inv_link_f, y, Y_metadata=None): """ Likelihood function given link(f) .. math:: p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \\lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: likelihood evaluated for this point :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f #Careful gamma(big_number) is infinity! objective = ((np.exp(gammaln((self.v + 1)*0.5) - gammaln(self.v * 0.5)) / (np.sqrt(self.v * np.pi * self.sigma2))) @@ -70,15 +70,15 @@ class StudentT(Likelihood): ) return np.prod(objective) - def logpdf_link(self, link_f, y, Y_metadata=None): + def logpdf_link(self, inv_link_f, y, Y_metadata=None): """ Log Likelihood Function given link(f) .. math:: \\ln p(y_{i}|\lambda(f_{i})) = \\ln \\Gamma\\left(\\frac{v+1}{2}\\right) - \\ln \\Gamma\\left(\\frac{v}{2}\\right) - \\ln \\sqrt{v \\pi\\sigma^{2}} - \\frac{v+1}{2}\\ln \\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - \lambda(f_{i}))^{2}}{\\sigma^{2}}\\right)\\right) - :param link_f: latent variables (link(f)) - :type link_f: Nx1 array + :param inv_link_f: latent variables (link(f)) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -86,11 +86,11 @@ class StudentT(Likelihood): :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f #FIXME: - #Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?! - #But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly + #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! + #But np.log(1 + (1/float(self.v))*((y-inv_link_f)**2)/self.sigma2) throws it correctly #print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) objective = (+ gammaln((self.v + 1) * 0.5) - gammaln(self.v * 0.5) @@ -99,15 +99,15 @@ class StudentT(Likelihood): ) return np.sum(objective) - def dlogpdf_dlink(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): """ Gradient of the log likelihood function at y, given link(f) w.r.t link(f) .. math:: \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\lambda(f)} = \\frac{(v+1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v} - :param link_f: latent variables (f) - :type link_f: Nx1 array + :param inv_link_f: latent variables (f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -115,12 +115,12 @@ class StudentT(Likelihood): :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) return grad - def d2logpdf_dlink2(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): """ Hessian at y, given link(f), w.r.t link(f) i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j) @@ -129,8 +129,8 @@ class StudentT(Likelihood): .. math:: \\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = \\frac{(v+1)((y_{i}-\lambda(f_{i}))^{2} - \\sigma^{2}v)}{((y_{i}-\lambda(f_{i}))^{2} + \\sigma^{2}v)^{2}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables inv_link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution @@ -141,90 +141,90 @@ class StudentT(Likelihood): Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) return hess - def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): """ Third order derivative log-likelihood function at y given link(f) w.r.t link(f) .. math:: \\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = \\frac{-2(v+1)((y_{i} - \lambda(f_{i}))^3 - 3(y_{i} - \lambda(f_{i})) \\sigma^{2} v))}{((y_{i} - \lambda(f_{i})) + \\sigma^{2} v)^3} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: third derivative of likelihood evaluated at points f :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / ((e**2 + self.sigma2*self.v)**3) ) return d3lik_dlink3 - def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): + def dlogpdf_link_dvar(self, inv_link_f, y, Y_metadata=None): """ Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise) .. math:: \\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{d\\sigma^{2}} = \\frac{v((y_{i} - \lambda(f_{i}))^{2} - \\sigma^{2})}{2\\sigma^{2}(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: float """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) return np.sum(dlogpdf_dvar) - def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): + def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): """ Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d \\ln p(y_{i}|\lambda(f_{i}))}{df}) = \\frac{-2\\sigma v(v + 1)(y_{i}-\lambda(f_{i}))}{(y_{i}-\lambda(f_{i}))^2 + \\sigma^2 v)^2} - :param link_f: latent variables link_f - :type link_f: Nx1 array + :param inv_link_f: latent variables inv_link_f + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of likelihood evaluated at points f w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) return dlogpdf_dlink_dvar - def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None): + def d2logpdf_dlink2_dvar(self, inv_link_f, y, Y_metadata=None): """ Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise) .. math:: \\frac{d}{d\\sigma^{2}}(\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}f}) = \\frac{v(v+1)(\\sigma^{2}v - 3(y_{i} - \lambda(f_{i}))^{2})}{(\\sigma^{2}v + (y_{i} - \lambda(f_{i}))^{2})^{3}} - :param link_f: latent variables link(f) - :type link_f: Nx1 array + :param inv_link_f: latent variables link(f) + :type inv_link_f: Nx1 array :param y: data :type y: Nx1 array :param Y_metadata: Y_metadata which is not used in student t distribution :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :rtype: Nx1 array """ - assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape - e = y - link_f + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + e = y - inv_link_f d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) / ((self.sigma2*self.v + (e**2))**3) ) @@ -246,11 +246,12 @@ class StudentT(Likelihood): return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) def predictive_mean(self, mu, sigma, Y_metadata=None): - return self.gp_link.transf(mu) # only true in link is monotoci, which it is. + # The comment here confuses mean and median. + return self.gp_link.transf(mu) # only true if link is monotonic, which it is. def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): - if self.deg_free <2.: - return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom + if self.deg_free<=2.: + return np.empty(mu.shape)*np.nan # does not exist for degrees of freedom <= 2. else: return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata) diff --git a/GPy/likelihoods/symbolic.py b/GPy/likelihoods/symbolic.py new file mode 100644 index 00000000..5d052119 --- /dev/null +++ b/GPy/likelihoods/symbolic.py @@ -0,0 +1,298 @@ +# Copyright (c) 2014 GPy Authors +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +try: + import sympy as sym + sympy_available=True + from sympy.utilities.lambdify import lambdify +except ImportError: + sympy_available=False + +import numpy as np +import link_functions +from scipy import stats, integrate +from scipy.special import gammaln, gamma, erf, polygamma +from GPy.util.functions import cum_gaussian, ln_cum_gaussian +from likelihood import Likelihood +from ..core.parameterization import Param + + +if sympy_available: + class Symbolic(Likelihood): + """ + Symbolic likelihood. + + Likelihood where the form of the likelihood is provided by a sympy expression. + + """ + def __init__(self, log_pdf=None, logZ=None, missing_log_pdf=None, gp_link=None, name='symbolic', log_concave=False, param=None, func_modules=[]): + + if gp_link is None: + gp_link = link_functions.Identity() + + if log_pdf is None: + raise ValueError, "You must provide an argument for the log pdf." + + self.func_modules = func_modules + [{'gamma':gamma, 'gammaln':gammaln, 'erf':erf,'polygamma':polygamma, 'cum_gaussian':cum_gaussian, 'ln_cum_gaussian':ln_cum_gaussian}, 'numpy'] + + super(Symbolic, self).__init__(gp_link, name=name) + self.missing_data = False + self._sym_log_pdf = log_pdf + if missing_log_pdf: + self.missing_data = True + self._sym_missing_log_pdf = missing_log_pdf + + # pull the variable names out of the symbolic pdf + sym_vars = [e for e in self._sym_log_pdf.atoms() if e.is_Symbol] + self._sym_f = [e for e in sym_vars if e.name=='f'] + if not self._sym_f: + raise ValueError('No variable f in log pdf.') + self._sym_y = [e for e in sym_vars if e.name=='y'] + if not self._sym_y: + raise ValueError('No variable y in log pdf.') + self._sym_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='y')],key=lambda e:e.name) + + theta_names = [theta.name for theta in self._sym_theta] + if self.missing_data: + # pull the variable names out of missing data + sym_vars = [e for e in self._sym_missing_log_pdf.atoms() if e.is_Symbol] + sym_f = [e for e in sym_vars if e.name=='f'] + if not sym_f: + raise ValueError('No variable f in missing log pdf.') + sym_y = [e for e in sym_vars if e.name=='y'] + if sym_y: + raise ValueError('Data is present in missing data portion of likelihood.') + # additional missing data parameters + missing_theta = sorted([e for e in sym_vars if not (e.name=='f' or e.name=='missing' or e.name in theta_names)],key=lambda e:e.name) + self._sym_theta += missing_theta + self._sym_theta = sorted(self._sym_theta, key=lambda e:e.name) + + # These are all the arguments need to compute likelihoods. + self.arg_list = self._sym_y + self._sym_f + self._sym_theta + + # these are arguments for computing derivatives. + derivative_arguments = self._sym_f + self._sym_theta + + # Do symbolic work to compute derivatives. + self._log_pdf_derivatives = {theta.name : sym.diff(self._sym_log_pdf,theta).simplify() for theta in derivative_arguments} + self._log_pdf_second_derivatives = {theta.name : sym.diff(self._log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._log_pdf_third_derivatives = {theta.name : sym.diff(self._log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + if self.missing_data: + # Do symbolic work to compute derivatives. + self._missing_log_pdf_derivatives = {theta.name : sym.diff(self._sym_missing_log_pdf,theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_second_derivatives = {theta.name : sym.diff(self._missing_log_pdf_derivatives['f'],theta).simplify() for theta in derivative_arguments} + self._missing_log_pdf_third_derivatives = {theta.name : sym.diff(self._missing_log_pdf_second_derivatives['f'],theta).simplify() for theta in derivative_arguments} + + + # Add parameters to the model. + for theta in self._sym_theta: + val = 1.0 + # TODO: need to decide how to handle user passing values for the se parameter vectors. + if param is not None: + if param.has_key(theta.name): + val = param[theta.name] + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) + + + # Is there some way to check whether the pdf is log + # concave? For the moment, need user to specify. + self.log_concave = log_concave + + # initialise code arguments + self._arguments = {} + + # generate the code for the pdf and derivatives + self._gen_code() + + def _gen_code(self): + """Generate the code from the symbolic parts that will be used for likleihod computation.""" + # TODO: Check here whether theano is available and set up + # functions accordingly. + self._log_pdf_function = lambdify(self.arg_list, self._sym_log_pdf, self.func_modules) + + # compute code for derivatives (for implicit likelihood terms + # we need up to 3rd derivatives) + setattr(self, '_first_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_derivatives[key], self.func_modules) for key in self._log_pdf_derivatives.keys()}) + setattr(self, '_second_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_second_derivatives[key], self.func_modules) for key in self._log_pdf_second_derivatives.keys()}) + setattr(self, '_third_derivative_code', {key: lambdify(self.arg_list, self._log_pdf_third_derivatives[key], self.func_modules) for key in self._log_pdf_third_derivatives.keys()}) + + if self.missing_data: + setattr(self, '_missing_first_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_derivatives[key], self.func_modules) for key in self._missing_log_pdf_derivatives.keys()}) + setattr(self, '_missing_second_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_second_derivatives[key], self.func_modules) for key in self._missing_log_pdf_second_derivatives.keys()}) + setattr(self, '_missing_third_derivative_code', {key: lambdify(self.arg_list, self._missing_log_pdf_third_derivatives[key], self.func_modules) for key in self._missing_log_pdf_third_derivatives.keys()}) + + # TODO: compute EP code parts based on logZ. We need dlogZ/dmu, d2logZ/dmu2 and dlogZ/dtheta + + def parameters_changed(self): + pass + + def update_gradients(self, grads): + """ + Pull out the gradients, be careful as the order must match the order + in which the parameters are added + """ + # The way the Laplace approximation is run requires the + # covariance function to compute the true gradient (because it + # is dependent on the mode). This means we actually compute + # the gradient outside this object. This function would + # normally ask the object to update its gradients internally, + # but here it provides them externally, because they are + # computed in the inference code. TODO: Thought: How does this + # effect EP? Shouldn't this be done by a separate + # Laplace-approximation specific call? + for grad, theta in zip(grads, self._sym_theta): + parameter = getattr(self, theta.name) + setattr(parameter, 'gradient', grad) + + def _arguments_update(self, f, y): + """Set up argument lists for the derivatives.""" + # If we do make use of Theano, then at this point we would + # need to do a lot of precomputation to ensure that the + # likelihoods and gradients are computed together, then check + # for parameter changes before updating. + for i, fvar in enumerate(self._sym_f): + self._arguments[fvar.name] = f + for i, yvar in enumerate(self._sym_y): + self._arguments[yvar.name] = y + for theta in self._sym_theta: + self._arguments[theta.name] = np.asarray(getattr(self, theta.name)) + + def pdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Likelihood function given inverse link of f. + + :param inv_link_f: inverse link of latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: likelihood evaluated for this point + :rtype: float + """ + return np.exp(self.logpdf_link(inv_link_f, y, Y_metadata=None)) + + def logpdf_link(self, inv_link_f, y, Y_metadata=None): + """ + Log Likelihood Function given inverse link of latent variables. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: likelihood evaluated for this point + :rtype: float + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + if self.missing_data: + ll = np.where(np.isnan(y), self._missing_log_pdf_function(**self._missing_arguments), self._log_pdf_function(**self._arguments)) + else: + ll = np.where(np.isnan(y), 0., self._log_pdf_function(**self._arguments)) + return np.sum(ll) + + def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): + """ + Gradient of log likelihood with respect to the inverse link function. + + :param inv_inv_link_f: latent variables (inverse link of f) + :type inv_inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata + :returns: gradient of likelihood with respect to each point. + :rtype: Nx1 array + + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + if self.missing_data: + return np.where(np.isnan(y), self._missing_first_derivative_code['f'](**self._missing_argments), self._first_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._first_derivative_code['f'](**self._arguments)) + + def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): + """ + Hessian of log likelihood given inverse link of latent variables with respect to that inverse link. + i.e. second derivative logpdf at y given inv_link(f_i) and inv_link(f_j) w.r.t inv_link(f_i) and inv_link(f_j). + + + :param inv_link_f: inverse link of the latent variables. + :type inv_link_f: Nx1 array + :param y: data + :type y: Nx1 array + :param Y_metadata: Y_metadata which is not used in student t distribution + :returns: Diagonal of Hessian matrix (second derivative of likelihood evaluated at points f) + :rtype: Nx1 array + + .. Note:: + Returns diagonal of Hessian, since every where else it is + 0, as the likelihood factorizes over cases (the + distribution for y_i depends only on link(f_i) not on + link(f_(j!=i)) + """ + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + if self.missing_data: + return np.where(np.isnan(y), self._missing_second_derivative_code['f'](**self._missing_argments), self._second_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._second_derivative_code['f'](**self._arguments)) + + def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + if self.missing_data: + return np.where(np.isnan(y), self._missing_third_derivative_code['f'](**self._missing_argments), self._third_derivative_code['f'](**self._argments)) + else: + return np.where(np.isnan(y), 0., self._third_derivative_code['f'](**self._arguments)) + + def dlogpdf_link_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_first_derivative_code[theta.name](**self._arguments), self._first_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._first_derivative_code[theta.name](**self._arguments)) + return g.sum(0) + + def dlogpdf_dlink_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_second_derivative_code[theta.name](**self._arguments), self._second_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._second_derivative_code[theta.name](**self._arguments)) + return g + + def d2logpdf_dlink2_dtheta(self, inv_link_f, y, Y_metadata=None): + assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape + self._arguments_update(inv_link_f, y) + g = np.zeros((np.atleast_1d(y).shape[0], len(self._sym_theta))) + for i, theta in enumerate(self._sym_theta): + if self.missing_data: + g[:, i:i+1] = np.where(np.isnan(y), self._missing_third_derivative_code[theta.name](**self._arguments), self._third_derivative_code[theta.name](**self._arguments)) + else: + g[:, i:i+1] = np.where(np.isnan(y), 0., self._third_derivative_code[theta.name](**self._arguments)) + return g + + def predictive_mean(self, mu, sigma, Y_metadata=None): + raise NotImplementedError + + def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): + raise NotImplementedError + + def conditional_mean(self, gp): + raise NotImplementedError + + def conditional_variance(self, gp): + raise NotImplementedError + + def samples(self, gp, Y_metadata=None): + raise NotImplementedError diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 1f01d4d5..03cd361c 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -27,7 +27,10 @@ class BayesianGPLVM(SparseGP): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): if X == None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) + self.init = init if X_variance is None: @@ -39,7 +42,7 @@ class BayesianGPLVM(SparseGP): assert Z.shape[1] == X.shape[1] if kernel is None: - kernel = kern.RBF(input_dim) # + kern.white(input_dim) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() @@ -48,21 +51,17 @@ class BayesianGPLVM(SparseGP): self.variational_prior = NormalPrior() X = NormalPosterior(X, X_variance) + if inference_method is None: + if np.any(np.isnan(Y)): + from ..inference.latent_function_inference.var_dtc import VarDTCMissingData + inference_method = VarDTCMissingData() + else: + from ..inference.latent_function_inference.var_dtc import VarDTC + inference_method = VarDTC() + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return SparseGP._getstate(self) + [self.init] - - def _setstate(self, state): - self._const_jitter = None - self.init = state.pop() - SparseGP._setstate(self, state) - def set_X_gradients(self, X, X_grad): """Set the gradients of the posterior distribution of X in its specific form.""" X.mean.gradient, X.variance.gradient = X_grad diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 9d918cda..2a4193ab 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -2,10 +2,10 @@ # Copyright (c) 2013, the GPy Authors (see AUTHORS.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt) -import numpy as np from ..core import GP from .. import likelihoods from .. import kern +from ..inference.latent_function_inference.expectation_propagation import EP class GPClassification(GP): """ @@ -27,4 +27,4 @@ class GPClassification(GP): likelihood = likelihoods.Bernoulli() - GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification') + GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification') diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index 86e64a54..d56e72b9 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -29,8 +29,3 @@ class GPRegression(GP): super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata) - def _getstate(self): - return GP._getstate(self) - - def _setstate(self, state): - return GP._setstate(self, state) diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index b85540ce..542dcd31 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -5,7 +5,6 @@ import numpy as np import pylab as pb from .. import kern -from ..util.linalg import PCA from ..core import GP, Param from ..likelihoods import Gaussian from .. import util @@ -29,9 +28,11 @@ class GPLVM(GP): """ if X is None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) if kernel is None: - kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2)) likelihood = Gaussian() @@ -43,12 +44,6 @@ class GPLVM(GP): super(GPLVM, self).parameters_changed() self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) - def _getstate(self): - return GP._getstate(self) - - def _setstate(self, state): - GP._setstate(self, state) - def jacobian(self,X): target = np.zeros((X.shape[0],X.shape[1],self.output_dim)) for i in range(self.output_dim): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index ac2ef9cd..458a70a1 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -6,12 +6,12 @@ import itertools import pylab from ..core import Model -from ..util.linalg import PCA from ..kern import Kern from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization import Param, Parameterized from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC from ..likelihoods import Gaussian +from GPy.util.initialization import initialize_latent class MRD(Model): """ @@ -51,27 +51,31 @@ class MRD(Model): inference_method=None, likelihood=None, name='mrd', Ynames=None): super(MRD, self).__init__(name) - # sort out the kernels - if kernel is None: - from ..kern import RBF - self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))] - elif isinstance(kernel, Kern): - self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))] - else: - assert len(kernel) == len(Ylist), "need one kernel per output" - assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" - self.kern = kernel self.input_dim = input_dim self.num_inducing = num_inducing self.Ylist = Ylist self._in_init_ = True - X = self._init_X(initx, Ylist) + X, fracs = self._init_X(initx, Ylist) self.Z = Param('inducing inputs', self._init_Z(initz, X)) self.num_inducing = self.Z.shape[0] # ensure M==N if M>N + # sort out the kernels + if kernel is None: + from ..kern import RBF + self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))] + elif isinstance(kernel, Kern): + self.kern = [] + for i in range(len(Ylist)): + k = kernel.copy() + self.kern.append(k) + else: + assert len(kernel) == len(Ylist), "need one kernel per output" + assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" + self.kern = kernel + if X_variance is None: - X_variance = np.random.uniform(0, .2, X.shape) + X_variance = np.random.uniform(0.1, 0.2, X.shape) self.variational_prior = NormalPrior() self.X = NormalPosterior(X, X_variance) @@ -107,9 +111,8 @@ class MRD(Model): def parameters_changed(self): self._log_marginal_likelihood = 0 self.posteriors = [] - self.Z.gradient = 0. - self.X.mean.gradient = 0. - self.X.variance.gradient = 0. + self.Z.gradient[:] = 0. + self.X.gradient[:] = 0. for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method): posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y) @@ -147,14 +150,22 @@ class MRD(Model): if Ylist is None: Ylist = self.Ylist if init in "PCA_concat": - X = PCA(np.hstack(Ylist), self.input_dim)[0] + X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist)) + fracs = [fracs]*self.input_dim elif init in "PCA_single": X = np.zeros((Ylist[0].shape[0], self.input_dim)) + fracs = [] for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist): - X[:, qs] = PCA(Y, len(qs))[0] + x,frcs = initialize_latent('PCA', len(qs), Y) + X[:, qs] = x + fracs.append(frcs) else: # init == 'random': X = np.random.randn(Ylist[0].shape[0], self.input_dim) - return X + fracs = X.var(0) + fracs = [fracs]*self.input_dim + X -= X.mean() + X /= X.std() + return X, fracs def _init_Z(self, init="permute", X=None): if X is None: diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 96f7ac5a..e2c77d95 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -46,11 +46,3 @@ class SparseGPClassification(SparseGP): SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) self.ensure_default_constraints() - def _getstate(self): - return SparseGP._getstate(self) - - - def _setstate(self, state): - return SparseGP._setstate(self, state) - - pass diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index 7edb93e4..f4d5513e 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -51,14 +51,6 @@ class SparseGPRegression(SparseGP): SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC()) - def _getstate(self): - return SparseGP._getstate(self) - - def _setstate(self, state): - return SparseGP._setstate(self, state) - - - class SparseGPRegressionUncertainInput(SparseGP): """ Gaussian Process model for regression with Gaussian variance on the inputs (X_variance) diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py index 5c10d0b8..638da63e 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/models/sparse_gplvm.py @@ -28,14 +28,6 @@ class SparseGPLVM(SparseGPRegression, GPLVM): SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) self.ensure_default_constraints() - def _getstate(self): - return SparseGPRegression._getstate(self) - - - def _setstate(self, state): - return SparseGPRegression._setstate(self, state) - - def _get_param_names(self): return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + SparseGPRegression._get_param_names(self)) diff --git a/GPy/models/svigp_regression.py b/GPy/models/svigp_regression.py index 3faa1cab..3397e31e 100644 --- a/GPy/models/svigp_regression.py +++ b/GPy/models/svigp_regression.py @@ -43,10 +43,3 @@ class SVIGPRegression(SVIGP): SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize) self.load_batch() - def _getstate(self): - return GPBase._getstate(self) - - - def _setstate(self, state): - return GPBase._setstate(self, state) - diff --git a/GPy/models/warped_gp.py b/GPy/models/warped_gp.py index d78f31df..4b982ed2 100644 --- a/GPy/models/warped_gp.py +++ b/GPy/models/warped_gp.py @@ -30,14 +30,6 @@ class WarpedGP(GP): GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self._set_params(self._get_params()) - def _getstate(self): - return GP._getstate(self) - - - def _setstate(self, state): - return GP._setstate(self, state) - - def _scale_data(self, Y): self._Ymax = Y.max() self._Ymin = Y.min() diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index b626758f..57b64ae5 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -53,8 +53,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_rows = slice(None) if which_data_ycols == 'all': which_data_ycols = np.arange(model.output_dim) - if len(which_data_ycols)==0: - raise ValueError('No data selected for plotting') + #if len(which_data_ycols)==0: + #raise ValueError('No data selected for plotting') if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index 12602879..37cec10b 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -33,6 +33,8 @@ class Test(unittest.TestCase): self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), []) + self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), []) + def test_shift_left(self): self.view.shift_left(0, 2) self.assertListEqual(self.param_index[three].tolist(), [2,5]) @@ -82,6 +84,10 @@ class Test(unittest.TestCase): self.assertEqual(self.param_index.size, 6) self.assertEqual(self.view.size, 5) + def test_print(self): + print self.param_index + print self.view + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_index_view'] unittest.main() \ No newline at end of file diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 9ed218d8..91683edc 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -5,9 +5,11 @@ import unittest import numpy as np import GPy import sys +from GPy.core.parameterization.param import Param verbose = 0 +np.random.seed(50) class Kern_check_model(GPy.core.Model): @@ -29,7 +31,7 @@ class Kern_check_model(GPy.core.Model): dL_dK = np.ones((X.shape[0], X2.shape[0])) self.kernel = kernel - self.X = GPy.core.parameterization.Param('X',X) + self.X = X self.X2 = X2 self.dL_dK = dL_dK @@ -76,10 +78,11 @@ class Kern_check_dK_dX(Kern_check_model): """This class allows gradient checks for the gradient of a kernel with respect to X. """ def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.X = Param('X',X) self.add_parameter(self.X) def parameters_changed(self): - self.X.gradient = self.kernel.gradients_X(self.dL_dK, self.X, self.X2) + self.X.gradient[:] = self.kernel.gradients_X(self.dL_dK, self.X, self.X2) class Kern_check_dKdiag_dX(Kern_check_dK_dX): """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ @@ -90,7 +93,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX): return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum() def parameters_changed(self): - self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) + self.X.gradient[:] = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X) @@ -126,6 +129,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Positive definite check failed for " + kern.name + " covariance function.") pass_checks = False + assert(result) return False if verbose: @@ -137,6 +141,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -148,6 +153,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -164,6 +170,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) pass_checks = False + assert(result) return False if verbose: @@ -182,6 +189,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") testmodel.checkgrad(verbose=True) + import ipdb;ipdb.set_trace() + assert(result) pass_checks = False return False @@ -201,6 +210,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb if not result: print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") testmodel.checkgrad(verbose=True) + assert(result) pass_checks = False return False @@ -218,6 +228,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) pass_checks = False + assert(result) return False return pass_checks @@ -226,7 +237,7 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb class KernelGradientTestsContinuous(unittest.TestCase): def setUp(self): - self.N, self.D = 100, 5 + self.N, self.D = 10, 5 self.X = np.random.randn(self.N,self.D) self.X2 = np.random.randn(self.N+10,self.D) @@ -243,6 +254,17 @@ class KernelGradientTestsContinuous(unittest.TestCase): k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Prod2(self): + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + + def test_Prod3(self): + k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)) + k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D)) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_Add(self): k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D) @@ -276,6 +298,11 @@ class KernelGradientTestsContinuous(unittest.TestCase): k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + def test_LinearFull(self): + k = GPy.kern.LinearFull(self.D, self.D-1) + k.randomize() + self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) + #TODO: turn off grad checkingwrt X for indexed kernels like coregionalize # class KernelGradientTestsContinuous1D(unittest.TestCase): # def setUp(self): @@ -338,7 +365,7 @@ class KernelTestsNonContinuous(unittest.TestCase): self.X2 = np.random.randn((N0+N1)*2, self.D+1) self.X2[:(N0*2), -1] = 0 self.X2[(N0*2):, -1] = 1 - + def test_IndependentOutputs(self): k = GPy.kern.RBF(self.D) kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') @@ -346,7 +373,7 @@ class KernelTestsNonContinuous(unittest.TestCase): k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1)) - + def test_ODE_UY(self): kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D]) X = self.X[self.X[:,-1]!=2] @@ -356,25 +383,25 @@ class KernelTestsNonContinuous(unittest.TestCase): if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - #unittest.main() - np.random.seed(0) - N0 = 3 - N1 = 9 - N2 = 4 - N = N0+N1+N2 - D = 3 - X = np.random.randn(N, D+1) - indices = np.random.random_integers(0, 2, size=N) - X[indices==0, -1] = 0 - X[indices==1, -1] = 1 - X[indices==2, -1] = 2 - #X = X[X[:, -1].argsort(), :] - X2 = np.random.randn((N0+N1)*2, D+1) - X2[:(N0*2), -1] = 0 - X2[(N0*2):, -1] = 1 - k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] - kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') - assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) - k = GPy.kern.RBF(D) - kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') - assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) + unittest.main() +# np.random.seed(0) +# N0 = 3 +# N1 = 9 +# N2 = 4 +# N = N0+N1+N2 +# D = 3 +# X = np.random.randn(N, D+1) +# indices = np.random.random_integers(0, 2, size=N) +# X[indices==0, -1] = 0 +# X[indices==1, -1] = 1 +# X[indices==2, -1] = 2 +# #X = X[X[:, -1].argsort(), :] +# X2 = np.random.randn((N0+N1)*2, D+1) +# X2[:(N0*2), -1] = 0 +# X2[(N0*2):, -1] = 1 +# k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')] +# kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split') +# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) +# k = GPy.kern.RBF(D) +# kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single') +# assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) diff --git a/GPy/testing/likelihood_tests.py b/GPy/testing/likelihood_tests.py index 7276e108..867851a7 100644 --- a/GPy/testing/likelihood_tests.py +++ b/GPy/testing/likelihood_tests.py @@ -112,7 +112,7 @@ class TestNoiseModels(object): self.f = None self.X = None - def test_noise_models(self): + def test_scale2_models(self): self.setUp() #################################################### @@ -150,64 +150,64 @@ class TestNoiseModels(object): noise_models = {"Student_t_default": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] - #"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] + #"constraints": [("t_scale2", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))] }, "laplace": True }, "Student_t_1_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [1.0], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_small_deg_free": { "model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_small_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [0.001], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_large_var": { "model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [10.0], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_approx_gauss": { "model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, "Student_t_log": { "model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var), "grad_params": { - "names": [".*t_noise"], + "names": [".*t_scale2"], "vals": [self.var], - "constraints": [(".*t_noise", constrain_positive), (".*deg_free", constrain_fixed)] + "constraints": [(".*t_scale2", constrain_positive), (".*deg_free", constrain_fixed)] }, "laplace": True }, diff --git a/GPy/testing/model_tests.py b/GPy/testing/model_tests.py index 3c39c5e0..4d20035d 100644 --- a/GPy/testing/model_tests.py +++ b/GPy/testing/model_tests.py @@ -130,7 +130,14 @@ class MiscTests(unittest.TestCase): m2.kern[:] = m.kern[''].values() np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood()) -class GradientTests(unittest.TestCase): + def test_model_optimize(self): + X = np.random.uniform(-3., 3., (20, 1)) + Y = np.sin(X) + np.random.randn(20, 1) * 0.05 + m = GPy.models.GPRegression(X,Y) + m.optimize() + print m + +class GradientTests(np.testing.TestCase): def setUp(self): ###################################### # # 1 dimensional example diff --git a/GPy/testing/observable_tests.py b/GPy/testing/observable_tests.py index 90623703..05794dc3 100644 --- a/GPy/testing/observable_tests.py +++ b/GPy/testing/observable_tests.py @@ -93,12 +93,12 @@ class Test(unittest.TestCase): def test_set_params(self): self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet') - self.par._param_array_[:] = 1 + self.par.param_array[:] = 1 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 1, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) - self.par._param_array_[:] = 2 + self.par.param_array[:] = 2 self.par._trigger_params_changed() self.assertEqual(self.par.params_changed_count, 2, 'now params changed') self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count) diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index dc59449f..8bfaab4e 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -7,7 +7,7 @@ import unittest import GPy import numpy as np from GPy.core.parameterization.parameter_core import HierarchyError -from GPy.core.parameterization.array_core import ObsAr +from GPy.core.parameterization.observable_array import ObsAr class ArrayCoreTest(unittest.TestCase): def setUp(self): @@ -34,6 +34,7 @@ class ParameterizedTest(unittest.TestCase): self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.test1 = GPy.core.Parameterized("test model") + self.test1.param = self.param self.test1.kern = self.rbf+self.white self.test1.add_parameter(self.test1.kern) self.test1.add_parameter(self.param, 0) @@ -58,6 +59,9 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.test1.kern.rbf.fix() self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3) + self.test1.fix() + self.assertTrue(self.test1.is_fixed) + self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*self.test1.size) def test_remove_parameter(self): from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp @@ -86,9 +90,9 @@ class ParameterizedTest(unittest.TestCase): self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size)) def test_remove_parameter_param_array_grad_array(self): - val = self.test1.kern._param_array_.copy() + val = self.test1.kern.param_array.copy() self.test1.kern.remove_parameter(self.white) - self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist()) + self.assertListEqual(self.test1.kern.param_array.tolist(), val[:2].tolist()) def test_add_parameter_already_in_hirarchy(self): self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) diff --git a/GPy/testing/pickle_tests.py b/GPy/testing/pickle_tests.py new file mode 100644 index 00000000..d975aaa3 --- /dev/null +++ b/GPy/testing/pickle_tests.py @@ -0,0 +1,207 @@ +''' +Created on 13 Mar 2014 + +@author: maxz +''' +import unittest, itertools +import cPickle as pickle +import numpy as np +from GPy.core.parameterization.index_operations import ParameterIndexOperations,\ + ParameterIndexOperationsView +import tempfile +from GPy.core.parameterization.param import Param +from GPy.core.parameterization.observable_array import ObsAr +from GPy.core.parameterization.priors import Gaussian +from GPy.kern._src.rbf import RBF +from GPy.kern._src.linear import Linear +from GPy.kern._src.static import Bias, White +from GPy.examples.dimensionality_reduction import mrd_simulation,\ + bgplvm_simulation +from GPy.examples.regression import toy_rbf_1d_50 +from GPy.core.parameterization.variational import NormalPosterior +from GPy.models.gp_regression import GPRegression + +class ListDictTestCase(unittest.TestCase): + def assertListDictEquals(self, d1, d2, msg=None): + for k,v in d1.iteritems(): + self.assertListEqual(list(v), list(d2[k]), msg) + def assertArrayListEquals(self, l1, l2): + for a1, a2 in itertools.izip(l1,l2): + np.testing.assert_array_equal(a1, a2) + +class Test(ListDictTestCase): + def test_parameter_index_operations(self): + pio = ParameterIndexOperations(dict(test1=np.array([4,3,1,6,4]), test2=np.r_[2:130])) + piov = ParameterIndexOperationsView(pio, 20, 250) + self.assertListDictEquals(dict(piov.items()), dict(piov.copy().iteritems())) + self.assertListDictEquals(dict(pio.iteritems()), dict(pio.copy().items())) + + self.assertArrayListEquals(pio.copy().indices(), pio.indices()) + self.assertArrayListEquals(piov.copy().indices(), piov.indices()) + + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(pio, f) + f.seek(0) + pio2 = pickle.load(f) + self.assertListDictEquals(pio._properties, pio2._properties) + + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(piov, f) + f.seek(0) + pio2 = pickle.load(f) + self.assertListDictEquals(dict(piov.items()), dict(pio2.iteritems())) + + def test_param(self): + param = Param('test', np.arange(4*2).reshape(4,2)) + param[0].constrain_positive() + param[1].fix() + param[2].set_prior(Gaussian(0,1)) + pcopy = param.copy() + self.assertListEqual(param.tolist(), pcopy.tolist()) + self.assertListEqual(str(param).split('\n'), str(pcopy).split('\n')) + self.assertIsNot(param, pcopy) + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(param, f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(param.tolist(), pcopy.tolist()) + self.assertSequenceEqual(str(param), str(pcopy)) + + def test_observable_array(self): + obs = ObsAr(np.arange(4*2).reshape(4,2)) + pcopy = obs.copy() + self.assertListEqual(obs.tolist(), pcopy.tolist()) + with tempfile.TemporaryFile('w+b') as f: + pickle.dump(obs, f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(obs.tolist(), pcopy.tolist()) + self.assertSequenceEqual(str(obs), str(pcopy)) + + def test_parameterized(self): + par = RBF(1, active_dims=[1]) + Linear(2, active_dims=[0,2]) + Bias(3) + White(3) + par.gradient = 10 + par.randomize() + pcopy = par.copy() + self.assertIsInstance(pcopy.constraints, ParameterIndexOperations) + self.assertIsInstance(pcopy.rbf.constraints, ParameterIndexOperationsView) + self.assertIs(pcopy.constraints, pcopy.rbf.constraints._param_index_ops) + self.assertIs(pcopy.constraints, pcopy.rbf.lengthscale.constraints._param_index_ops) + self.assertIs(pcopy.constraints, pcopy.linear.constraints._param_index_ops) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + pcopy.gradient = 10 + np.testing.assert_allclose(par.linear.full_gradient, pcopy.linear.full_gradient) + np.testing.assert_allclose(pcopy.linear.full_gradient, 10) + self.assertSequenceEqual(str(par), str(pcopy)) + + def test_model(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def test_modelrecreation(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def test_posterior(self): + X = np.random.randn(3,5) + Xv = np.random.rand(*X.shape) + par = NormalPosterior(X,Xv) + par.gradient = 10 + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + pcopy.gradient = 10 + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + np.testing.assert_allclose(pcopy.mean.full_gradient, 10) + self.assertSequenceEqual(str(par), str(pcopy)) + + def test_model_concat(self): + par = mrd_simulation(optimize=0, plot=0, plot_sim=0) + par.randomize() + pcopy = par.copy() + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + self.assertListEqual(par.full_gradient.tolist(), pcopy.full_gradient.tolist()) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assertIsNot(par.param_array, pcopy.param_array) + self.assertIsNot(par.full_gradient, pcopy.full_gradient) + self.assertTrue(pcopy.checkgrad()) + self.assert_(np.any(pcopy.gradient!=0.0)) + with tempfile.TemporaryFile('w+b') as f: + par.pickle(f) + f.seek(0) + pcopy = pickle.load(f) + self.assertListEqual(par.param_array.tolist(), pcopy.param_array.tolist()) + np.testing.assert_allclose(par.full_gradient, pcopy.full_gradient) + self.assertSequenceEqual(str(par), str(pcopy)) + self.assert_(pcopy.checkgrad()) + + def _callback(self, what, which): + what.count += 1 + + def test_add_observer(self): + par = toy_rbf_1d_50(optimize=0, plot=0) + par.name = "original" + par.count = 0 + par.add_observer(self, self._callback, 1) + pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) + self.assertNotIn(par.observers[0], pcopy.observers) + pcopy = par.copy() + pcopy.name = "copy" + self.assertTrue(par.checkgrad()) + self.assertTrue(pcopy.checkgrad()) + self.assertTrue(pcopy.kern.checkgrad()) + import ipdb;ipdb.set_trace() + self.assertIn(par.observers[0], pcopy.observers) + self.assertEqual(par.count, 3) + self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad + +if __name__ == "__main__": + #import sys;sys.argv = ['', 'Test.test_parameter_index_operations'] + unittest.main() \ No newline at end of file diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 676c3ab8..bb162ee3 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -48,7 +48,7 @@ class Cacher(object): if k in kw and kw[k] is not None: return self.operation(*args, **kw) # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING - #return self.operation(*args) + # return self.operation(*args, **kw) #if the result is cached, return the cached computation state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs] @@ -98,11 +98,20 @@ class Cacher(object): self.cached_outputs = [] self.inputs_changed = [] + def __deepcopy__(self, memo=None): + return Cacher(self.operation, self.limit, self.ignore_args, self.force_kwargs) + + def __getstate__(self, memo=None): + raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)) + + def __setstate__(self, memo=None): + raise NotImplementedError, "Trying to pickle Cacher object with function {}, pickling functions not possible.".format(str(self.operation)) + @property def __name__(self): return self.operation.__name__ -from functools import partial +from functools import partial, update_wrapper class Cacher_wrap(object): def __init__(self, f, limit, ignore_args, force_kwargs): @@ -110,6 +119,7 @@ class Cacher_wrap(object): self.ignore_args = ignore_args self.force_kwargs = force_kwargs self.f = f + update_wrapper(self, self.f) def __get__(self, obj, objtype=None): return partial(self, obj) def __call__(self, *args, **kwargs): diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 54e42733..f53163f4 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -331,7 +331,7 @@ def football_data(season='1314', data_set='football_data'): # This will be for downloading google trends data. def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'): - """Data downloaded from Google trends for given query terms.""" + """Data downloaded from Google trends for given query terms. Warning, if you use this function multiple times in a row you get blocked due to terms of service violations.""" # Inspired by this notebook: # http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb diff --git a/GPy/util/functions.py b/GPy/util/functions.py new file mode 100644 index 00000000..a9ee1084 --- /dev/null +++ b/GPy/util/functions.py @@ -0,0 +1,18 @@ +import numpy as np +from scipy.special import erf, erfcx +import sys +epsilon = sys.float_info.epsilon +lim_val = -np.log(epsilon) + +def cum_gaussian(x): + g=0.5*(1+erf(x/np.sqrt(2))) + return np.where(g==0, epsilon, np.where(g==1, 1-epsilon, g)) + +def ln_cum_gaussian(x): + return np.where(x < 0, -.5*x*x + np.log(.5) + np.log(erfcx(-np.sqrt(2)/2*x)), np.log(cum_gaussian(x))) + +def clip_exp(x): + if any(x>=lim_val) or any(x<=-lim_val): + return np.where(x-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) + else: + return np.exp(x) diff --git a/GPy/util/initialization.py b/GPy/util/initialization.py index 24194b41..22e63b6b 100644 --- a/GPy/util/initialization.py +++ b/GPy/util/initialization.py @@ -5,13 +5,15 @@ Created on 24 Feb 2014 ''' import numpy as np -from linalg import PCA +from GPy.util.pca import pca def initialize_latent(init, input_dim, Y): Xr = np.random.randn(Y.shape[0], input_dim) if init == 'PCA': - PC = PCA(Y, input_dim)[0] + p = pca(Y) + PC = p.project(Y, min(input_dim, Y.shape[1])) Xr[:PC.shape[0], :PC.shape[1]] = PC else: - pass - return Xr \ No newline at end of file + var = Xr.var(0) + return Xr, var/var.max() + return Xr, p.fracs[:input_dim] \ No newline at end of file diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 4745c4aa..b204f813 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'): tmp, _ = dtrtrs(L, X, lower=1, trans=0) return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T -def PCA(Y, input_dim): - """ - Principal component analysis: maximum likelihood solution by SVD - - :param Y: NxD np.array of data - :param input_dim: int, dimension of projection - - - :rval X: - Nxinput_dim np.array of dimensionality reduced data - :rval W: - input_dimxD mapping from X to Y - - """ - if not np.allclose(Y.mean(axis=0), 0.0): - print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - - # Y -= Y.mean(axis=0) - - Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) - [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] - v = X.std(axis=0) - X /= v; - W *= v; - return X, W.T diff --git a/GPy/util/pca.py b/GPy/util/pca.py new file mode 100644 index 00000000..6c548b3d --- /dev/null +++ b/GPy/util/pca.py @@ -0,0 +1,122 @@ +''' +Created on 10 Sep 2012 + +@author: Max Zwiessele +@copyright: Max Zwiessele 2012 +''' +import numpy +import pylab +import matplotlib +from numpy.linalg.linalg import LinAlgError + +class pca(object): + """ + pca module with automatic primal/dual determination. + """ + def __init__(self, X): + self.mu = X.mean(0) + self.sigma = X.std(0) + + X = self.center(X) + + # self.X = input + if X.shape[0] >= X.shape[1]: + # print "N >= D: using primal" + self.eigvals, self.eigvectors = self._primal_eig(X) + else: + # print "N < D: using dual" + self.eigvals, self.eigvectors = self._dual_eig(X) + self.sort = numpy.argsort(self.eigvals)[::-1] + self.eigvals = self.eigvals[self.sort] + self.eigvectors = self.eigvectors[:, self.sort] + self.fracs = self.eigvals / self.eigvals.sum() + self.Q = self.eigvals.shape[0] + + def center(self, X): + """ + Center `X` in pca space. + """ + X = X - self.mu + X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma) + return X + + def _primal_eig(self, X): + return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X)) + + def _dual_eig(self, X): + dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X)) + relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:] + eigvals = dual_eigvals[relevant_dimensions] + eigvects = dual_eigvects[:, relevant_dimensions] + eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects) + eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects))) + return eigvals, eigvects + + def project(self, X, Q=None): + """ + Project X into pca space, defined by the Q highest eigenvalues. + Y = X dot V + """ + if Q is None: + Q = self.Q + if Q > X.shape[1]: + raise IndexError("requested dimension larger then input dimension") + X = self.center(X) + return X.dot(self.eigvectors[:, :Q]) + + def plot_fracs(self, Q=None, ax=None, fignum=None): + """ + Plot fractions of Eigenvalues sorted in descending order. + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if Q is None: + Q = self.Q + ticks = numpy.arange(Q) + bar = ax.bar(ticks - .4, self.fracs[:Q]) + ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1)) + ax.set_ylabel("Eigenvalue fraction") + ax.set_xlabel("PC") + ax.set_ylim(0, ax.get_ylim()[1]) + ax.set_xlim(ticks.min() - .5, ticks.max() + .5) + try: + pylab.tight_layout() + except: + pass + return bar + + def plot_2d(self, X, labels=None, s=20, marker='o', + dimensions=(0, 1), ax=None, colors=None, + fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable + ** kwargs): + """ + Plot dimensions `dimensions` with given labels against each other in + PC space. Labels can be any sequence of labels of dimensions X.shape[0]. + Labels can be drawn with a subsequent call to legend() + """ + if ax is None: + fig = pylab.figure(fignum) + ax = fig.add_subplot(111) + if labels is None: + labels = numpy.zeros(X.shape[0]) + ulabels = [] + for lab in labels: + if not lab in ulabels: + ulabels.append(lab) + nlabels = len(ulabels) + if colors is None: + colors = [cmap(float(i) / nlabels) for i in range(nlabels)] + X_ = self.project(X, self.Q)[:,dimensions] + kwargs.update(dict(s=s)) + plots = list() + for i, l in enumerate(ulabels): + kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)])) + plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs)) + ax.set_xlabel(r"PC$_1$") + ax.set_ylabel(r"PC$_2$") + try: + pylab.tight_layout() + except: + pass + return plots \ No newline at end of file diff --git a/GPy/util/symbolic.py b/GPy/util/symbolic.py index 49c8c33a..5b3ac312 100644 --- a/GPy/util/symbolic.py +++ b/GPy/util/symbolic.py @@ -1,5 +1,55 @@ -from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign +from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma,polygamma +class gammaln(Function): + nargs = 1 + + def fdiff(self, argindex=1): + x=self.args[0] + return polygamma(0, x) + + @classmethod + def eval(cls, x): + if x.is_Number: + return log(gamma(x)) + + +class ln_cum_gaussian(Function): + nargs = 1 + + def fdiff(self, argindex=1): + x = self.args[0] + return exp(-ln_cum_gaussian(x) - 0.5*x*x)/sqrt(2*pi) + + @classmethod + def eval(cls, x): + if x.is_Number: + return log(cum_gaussian(x)) + + def _eval_is_real(self): + return self.args[0].is_real + +class cum_gaussian(Function): + nargs = 1 + def fdiff(self, argindex=1): + x = self.args[0] + return gaussian(x) + + @classmethod + def eval(cls, x): + if x.is_Number: + return 0.5*(1+erf(sqrt(2)/2*x)) + + def _eval_is_real(self): + return self.args[0].is_real + +class gaussian(Function): + nargs = 1 + @classmethod + def eval(cls, x): + return 1/sqrt(2*pi)*exp(-0.5*x*x) + + def _eval_is_real(self): + return True class ln_diff_erf(Function): nargs = 2