diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index 839529d6..a42d76ed 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -2,7 +2,9 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from model import * -from parameterization.parameterized import * +from parameterization.parameterized import adjust_name_for_printing, Parameterizable +from parameterization.param import Param, ParamConcatenation + from gp import GP from sparse_gp import SparseGP from svigp import SVIGP diff --git a/GPy/core/gp.py b/GPy/core/gp.py index d769678e..d8d1a87a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -30,7 +30,10 @@ class GP(Model): super(GP, self).__init__(name) assert X.ndim == 2 - self.X = ObservableArray(X) + if isinstance(X, ObservableArray): + self.X = self.X = X + else: self.X = ObservableArray(X) + self.num_data, self.input_dim = self.X.shape assert Y.ndim == 2 @@ -43,7 +46,8 @@ class GP(Model): else: self.Y_metadata = None - assert isinstance(kernel, kern.kern) + assert isinstance(kernel, kern.Kern) + assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) @@ -70,7 +74,7 @@ class GP(Model): def log_likelihood(self): return self._log_marginal_likelihood - def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): + def _raw_predict(self, _Xnew, full_cov=False): """ Internal helper function for making predictions, does not account for normalization or likelihood @@ -80,29 +84,27 @@ class GP(Model): diagonal of the covariance is returned. """ - Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T + Kx = self.kern.K(_Xnew, self.X).T #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) WiKx = np.dot(self.posterior.woodbury_inv, Kx) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(_Xnew, which_parts=which_parts) + Kxx = self.kern.K(_Xnew) #var = Kxx - tdot(LiKx.T) var = np.dot(Kx.T, WiKx) else: - Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(_Xnew) #var = Kxx - np.sum(LiKx*LiKx, 0) var = Kxx - np.sum(WiKx*Kx, 0) var = var.reshape(-1, 1) return mu, var - def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): + def predict(self, Xnew, full_cov=False, **likelihood_args): """ Predict the function(s) at the new point(s) Xnew. :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool @@ -118,13 +120,13 @@ class GP(Model): """ #predict the latent function values - mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + mu, var = self._raw_predict(Xnew, full_cov=full_cov) # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) return mean, var, _025pm, _975pm - def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. @@ -132,13 +134,11 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov) + m, v = self._raw_predict(X, full_cov=full_cov) v = v.reshape(m.size,-1) if len(v.shape)==3 else v if not full_cov: Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T @@ -147,7 +147,7 @@ class GP(Model): return Ysim - def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None): + def posterior_samples(self,X,size=10, full_cov=True,noise_model=None): """ Samples the posterior GP at the points X. @@ -155,15 +155,13 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :param noise_model: for mixed noise likelihood, the noise model to use in the samples. :type noise_model: integer. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov) + Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) if isinstance(self.likelihood, Gaussian): noise_std = np.sqrt(self.likelihood._get_params()) Ysim += np.random.normal(0,noise_std,Ysim.shape) diff --git a/GPy/core/model.py b/GPy/core/model.py index 55083aaf..21bcf0c7 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -4,12 +4,8 @@ from .. import likelihoods from ..inference import optimization -from ..util.linalg import jitchol from ..util.misc import opt_wrapper from parameterization import Parameterized -from parameterization.parameterized import UNFIXED -from parameterization.domains import _POSITIVE, _REAL -from parameterization.index_operations import ParameterIndexOperations import multiprocessing as mp import numpy as np from numpy.linalg.linalg import LinAlgError @@ -240,7 +236,7 @@ class Model(Parameterized): constrained positive. """ raise DeprecationWarning, 'parameters now have default constraints' - positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] + #positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity'] # param_names = self._get_param_names() # for s in positive_strings: @@ -489,20 +485,17 @@ class Model(Parameterized): if not hasattr(self, 'kern'): raise ValueError, "this model has no kernel" - k = [p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] - if (not len(k) == 1): - raise ValueError, "cannot determine sensitivity for this kernel" - k = k[0] - from ..kern.parts.rbf import RBF - from ..kern.parts.rbf_inv import RBFInv - from ..kern.parts.linear import Linear + k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD] + from ..kern import RBF, Linear#, RBFInv + if isinstance(k, RBF): return 1. / k.lengthscale - elif isinstance(k, RBFInv): - return k.inv_lengthscale + #elif isinstance(k, RBFInv): + # return k.inv_lengthscale elif isinstance(k, Linear): return k.variances - + else: + raise ValueError, "cannot determine sensitivity for this kernel" def pseudo_EM(self, stop_crit=.1, **kwargs): """ diff --git a/GPy/core/parameterization/array_core.py b/GPy/core/parameterization/array_core.py index 7892e94a..e8be0f77 100644 --- a/GPy/core/parameterization/array_core.py +++ b/GPy/core/parameterization/array_core.py @@ -28,14 +28,20 @@ class ObservableArray(np.ndarray, Observable): """ __array_priority__ = -1 # Never give back ObservableArray def __new__(cls, input_array): - obj = np.atleast_1d(input_array).view(cls) + if not isinstance(input_array, ObservableArray): + obj = np.atleast_1d(input_array).view(cls) + else: obj = input_array cls.__name__ = "ObservableArray\n " - obj._observers_ = {} return obj + + def __init__(self, *a, **kw): + super(ObservableArray, self).__init__(*a, **kw) + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return - self._observers_ = getattr(obj, '_observers_', None) + self._observer_callables_ = getattr(obj, '_observer_callables_', None) + def __array_wrap__(self, out_arr, context=None): return out_arr.view(np.ndarray) diff --git a/GPy/core/parameterization/index_operations.py b/GPy/core/parameterization/index_operations.py index bfd0bf21..b5399741 100644 --- a/GPy/core/parameterization/index_operations.py +++ b/GPy/core/parameterization/index_operations.py @@ -83,11 +83,21 @@ class ParameterIndexOperations(object): def iterproperties(self): return self._properties.iterkeys() - def shift(self, start, size): + def shift_right(self, start, size): for ind in self.iterindices(): toshift = ind>=start - if toshift.size > 0: - ind[toshift] += size + ind[toshift] += size + + def shift_left(self, start, size): + for v, ind in self.items(): + todelete = (ind>=start) * (ind=start + if toshift.size != 0: + ind[toshift] -= size + if ind.size != 0: self._properties[v] = ind + else: del self._properties[v] def clear(self): self._properties.clear() @@ -183,7 +193,7 @@ class ParameterIndexOperationsView(object): yield i - def shift(self, start, size): + def shift_right(self, start, size): raise NotImplementedError, 'Shifting only supported in original ParamIndexOperations' diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..ccbc76d5 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -3,7 +3,7 @@ import itertools import numpy -from parameter_core import Constrainable, Gradcheckable, Indexable, Parameterizable, adjust_name_for_printing +from parameter_core import Constrainable, Gradcheckable, Indexable, Parentable, adjust_name_for_printing from array_core import ObservableArray, ParamList ###### printing @@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision __print_threshold__ = 5 ###### -class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameterizable): +class Param(Constrainable, ObservableArray, Gradcheckable, Indexable): """ Parameter object for GPy models. @@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri :param input_array: array which this parameter handles :type input_array: numpy.ndarray :param default_constraint: The default constraint for this parameter - :type default_constraint: + :type default_constraint: You can add/remove constraints by calling constrain on the parameter itself, e.g: @@ -54,12 +54,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri obj._tied_to_me_ = SetDict() obj._tied_to_ = [] obj._original_ = True - obj.gradient = None + obj._gradient_ = None return obj - def __init__(self, name, input_array, default_constraint=None): - super(Param, self).__init__(name=name, default_constraint=default_constraint) - + def __init__(self, name, input_array, default_constraint=None, *a, **kw): + super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw) + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return @@ -76,10 +76,20 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self._updated_ = getattr(obj, '_updated_', None) self._original_ = getattr(obj, '_original_', None) self._name = getattr(obj, 'name', None) - self.gradient = getattr(obj, 'gradient', None) + self._gradient_ = getattr(obj, '_gradient_', None) self.constraints = getattr(obj, 'constraints', None) self.priors = getattr(obj, 'priors', None) + + @property + def gradient(self): + if self._gradient_ is None: + self._gradient_ = numpy.zeros(self._realshape_) + return self._gradient_[self._current_slice_] + @gradient.setter + def gradient(self, val): + self.gradient[:] = val + #=========================================================================== # Pickling operations #=========================================================================== @@ -114,7 +124,14 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri self._parent_index_ = state.pop() self._direct_parent_ = state.pop() self.name = state.pop() - + + def copy(self, *args): + constr = self.constraints.copy() + priors = self.priors.copy() + p = Param(self.name, self.view(numpy.ndarray).copy(), self._default_constraint_) + p.constraints = constr + p.priors = priors + return p #=========================================================================== # get/set parameters #=========================================================================== @@ -127,7 +144,10 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri return self.flat def _collect_gradient(self, target): - target[:] = self.gradient.flat + target += self.gradient.flat + + def _set_gradient(self, g): + self.gradient = g.reshape(self._realshape_) #=========================================================================== # Array operations -> done @@ -192,7 +212,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri return numpy.r_[a] return numpy.r_[:b] return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) - + #=========================================================================== # Convenience #=========================================================================== @@ -214,7 +234,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri def _description_str(self): if self.size <= 1: return ["%f" % self] else: return [str(self.shape)] - def parameter_names(self, add_name=False): + def parameter_names(self, add_self=False, adjust_for_printing=False): + if adjust_for_printing: + return [adjust_name_for_printing(self.name)] return [self.name] @property def flattened_parameters(self): @@ -231,14 +253,9 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri @property def _ties_str(self): return [t._short() for t in self._tied_to_] or [''] - @property - def name_hirarchical(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - return adjust_name_for_printing(self.name) def __repr__(self, *args, **kwargs): name = "\033[1m{x:s}\033[0;0m:\n".format( - x=self.name_hirarchical) + x=self.hirarchy_name()) return name + super(Param, self).__repr__(*args, **kwargs) def _ties_for(self, rav_index): # size = sum(p.size for p in self._tied_to_) @@ -260,7 +277,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + clean_curr_slice += range(self._realshape_[i]) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), @@ -272,12 +289,12 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri gen = map(lambda x: " ".join(map(str, x)), gen) return reduce(lambda a, b:max(a, len(b)), gen, len(header)) def _max_len_values(self): - return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.name_hirarchical)) + return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hirarchy_name())) def _max_len_index(self, ind): return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__)) def _short(self): # short string to print - name = self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + name = self.hirarchy_name() if self._realsize_ < 2: return name ind = self._indices() @@ -300,8 +317,8 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri if lp is None: lp = self._max_len_names(prirs, __tie_name__) sep = '-' header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}" - if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing - else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing + if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing + else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hirarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing if not ties: ties = itertools.cycle(['']) return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices # except: return super(Param, self).__str__() @@ -426,4 +443,4 @@ class ParamConcatenation(object): start = False return "\n".join(strings) def __repr__(self): - return "\n".join(map(repr,self.params)) \ No newline at end of file + return "\n".join(map(repr,self.params)) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..c2c8a05a 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -7,19 +7,24 @@ __updated__ = '2013-12-16' def adjust_name_for_printing(name): if name is not None: - return name.replace(" ", "_").replace(".", "_").replace("-","").replace("+","").replace("!","").replace("*","").replace("/","") + return name.replace(" ", "_").replace(".", "_").replace("-", "").replace("+", "").replace("!", "").replace("*", "").replace("/", "") return '' class Observable(object): - _observers_ = {} + def __init__(self, *args, **kwargs): + from collections import defaultdict + self._observer_callables_ = defaultdict(list) + def add_observer(self, observer, callble): - self._observers_[observer] = callble - #callble(self) - def remove_observer(self, observer): - del self._observers_[observer] + self._observer_callables_[observer].append(callble) + + def remove_observer(self, observer, callble): + del self._observer_callables_[observer][callble] + def _notify_observers(self): - [callble(self) for callble in self._observers_.itervalues()] - + [[callble(self) for callble in callables] + for callables in self._observer_callables_.itervalues()] + class Pickleable(object): def _getstate(self): """ @@ -47,10 +52,8 @@ class Pickleable(object): #=============================================================================== class Parentable(object): - def __init__(self, direct_parent=None, parent_index=None): - super(Parentable,self).__init__() - self._direct_parent_ = direct_parent - self._parent_index_ = parent_index + _direct_parent_ = None + _parent_index_ = None def has_parent(self): return self._direct_parent_ is not None @@ -68,10 +71,13 @@ class Parentable(object): return self return self._direct_parent_._highest_parent_ + def _notify_parameters_changed(self): + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + class Nameable(Parentable): - _name = None - def __init__(self, name, direct_parent=None, parent_index=None): - super(Nameable,self).__init__(direct_parent, parent_index) + def __init__(self, name, *a, **kw): + super(Nameable, self).__init__(*a, **kw) self._name = name or self.__class__.__name__ @property @@ -80,58 +86,21 @@ class Nameable(Parentable): @name.setter def name(self, name): from_name = self.name + assert isinstance(name, str) self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) - - -class Parameterizable(Parentable): - def __init__(self, *args, **kwargs): - super(Parameterizable, self).__init__(*args, **kwargs) - from GPy.core.parameterization.array_core import ParamList - _parameters_ = ParamList() - - def parameter_names(self, add_name=False): - if add_name: - return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] - return [xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] - - def _collect_gradient(self, target): - import itertools - [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - - def _get_params(self): - import numpy as np - # don't overwrite this anymore! - if not self.size: - return np.empty(shape=(0,), dtype=np.float64) - return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0]) - - def _set_params(self, params, update=True): - # don't overwrite this anymore! - import itertools - [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] - self.parameters_changed() - - def parameters_changed(self): - """ - This method gets called when parameters have changed. - Another way of listening to param changes is to - add self as a listener to the param, such that - updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` - """ - pass - - def _notify_parameters_changed(self): - self.parameters_changed() + self._direct_parent_._name_changed(self, from_name) + def hirarchy_name(self, adjust_for_printing=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x if self.has_parent(): - self._direct_parent_._notify_parameters_changed() + return self._direct_parent_.hirarchy_name() + "." + adjust(self.name) + return adjust(self.name) class Gradcheckable(Parentable): - #=========================================================================== - # Gradchecking - #=========================================================================== + def __init__(self, *a, **kw): + super(Gradcheckable, self).__init__(*a, **kw) def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3): if self.has_parent(): return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) @@ -139,6 +108,7 @@ class Gradcheckable(Parentable): def _checkgrad(self, param): raise NotImplementedError, "Need log likelihood to check gradient against" + class Indexable(object): def _raveled_index(self): raise NotImplementedError, "Need to be able to get the raveled Index" @@ -157,9 +127,10 @@ class Indexable(object): """ raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" -class Constrainable(Nameable, Indexable, Parameterizable): - def __init__(self, name, default_constraint=None): - super(Constrainable,self).__init__(name) + +class Constrainable(Nameable, Indexable): + def __init__(self, name, default_constraint=None, *a, **kw): + super(Constrainable, self).__init__(name=name, *a, **kw) self._default_constraint_ = default_constraint from index_operations import ParameterIndexOperations self.constraints = ParameterIndexOperations() @@ -167,6 +138,16 @@ class Constrainable(Nameable, Indexable, Parameterizable): if self._default_constraint_ is not None: self.constrain(self._default_constraint_) + def _disconnect_parent(self, constr=None): + if constr is None: + constr = self.constraints.copy() + self.constraints.clear() + self.constraints = constr + self._direct_parent_ = None + self._parent_index_ = None + self._connect_fixes() + self._notify_parent_change() + #=========================================================================== # Fixing Parameters: #=========================================================================== @@ -200,7 +181,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): def _set_unfixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) - #rav_i = self._raveled_index_for(param)[index] + # rav_i = self._raveled_index_for(param)[index] self._fixes_[index] = UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED @@ -230,7 +211,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): """evaluate the prior""" if self.priors.size > 0: x = self._get_params() - return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) + return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return 0. def _log_prior_gradients(self): @@ -334,7 +315,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): if len(transforms) == 0: transforms = which.properties() import numpy as np - removed = np.empty((0, ), dtype=int) + removed = np.empty((0,), dtype=int) for t in transforms: unconstrained = which.remove(t, self._raveled_index()) removed = np.union1d(removed, unconstrained) @@ -344,5 +325,108 @@ class Constrainable(Nameable, Indexable, Parameterizable): return removed +class Parameterizable(Constrainable): + def __init__(self, *args, **kwargs): + super(Parameterizable, self).__init__(*args, **kwargs) + from GPy.core.parameterization.array_core import ParamList + _parameters_ = ParamList() + self._added_names_ = set() + + def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): + if adjust_for_printing: adjust = lambda x: adjust_name_for_printing(x) + else: adjust = lambda x: x + if recursive: names = [xi for x in self._parameters_ for xi in x.parameter_names(add_self=True, adjust_for_printing=adjust_for_printing)] + else: names = [adjust(x.name) for x in self._parameters_] + if add_self: names = map(lambda x: adjust(self.name) + "." + x, names) + return names + + def _add_parameter_name(self, param): + pname = adjust_name_for_printing(param.name) + # and makes sure to not delete programmatically added parameters + if pname in self.__dict__: + if not (param is self.__dict__[pname]): + if pname in self._added_names_: + del self.__dict__[pname] + self._add_parameter_name(param) + else: + self.__dict__[pname] = param + self._added_names_.add(pname) + + def _remove_parameter_name(self, param=None, pname=None): + assert param is None or pname is None, "can only delete either param by name, or the name of a param" + pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name) + if pname in self._added_names_: + del self.__dict__[pname] + self._added_names_.remove(pname) + self._connect_parameters() + def _name_changed(self, param, old_name): + self._remove_parameter_name(None, old_name) + self._add_parameter_name(param) + + def _collect_gradient(self, target): + import itertools + [p._collect_gradient(target[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + + def _set_gradient(self, g): + import itertools + [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + + def _get_params(self): + import numpy as np + # don't overwrite this anymore! + if not self.size: + return np.empty(shape=(0,), dtype=np.float64) + return np.hstack([x._get_params() for x in self._parameters_ if x.size > 0]) + + def _set_params(self, params, update=True): + # don't overwrite this anymore! + import itertools + [p._set_params(params[s], update=update) for p, s in itertools.izip(self._parameters_, self._param_slices_)] + self.parameters_changed() + + def copy(self): + """Returns a (deep) copy of the current model""" + import copy + from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView + from .array_core import ParamList + + dc = dict() + for k, v in self.__dict__.iteritems(): + if k not in ['_direct_parent_', '_parameters_', '_parent_index_'] + self.parameter_names(): + if isinstance(v, (Constrainable, ParameterIndexOperations, ParameterIndexOperationsView)): + dc[k] = v.copy() + else: + dc[k] = copy.deepcopy(v) + if k == '_parameters_': + params = [p.copy() for p in v] + + dc['_direct_parent_'] = None + dc['_parent_index_'] = None + dc['_parameters_'] = ParamList() + dc['constraints'].clear() + dc['priors'].clear() + dc['size'] = 0 + + s = self.__new__(self.__class__) + s.__dict__ = dc + + for p in params: + s.add_parameter(p) + + return s + + def _notify_parameters_changed(self): + self.parameters_changed() + if self.has_parent(): + self._direct_parent_._notify_parameters_changed() + + def parameters_changed(self): + """ + This method gets called when parameters have changed. + Another way of listening to param changes is to + add self as a listener to the param, such that + updates get passed through. See :py:function:``GPy.core.param.Observable.add_observer`` + """ + pass diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index c8a841c0..d463ed43 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -3,16 +3,15 @@ import numpy; np = numpy -import copy import cPickle import itertools from re import compile, _pattern_type -from param import ParamConcatenation, Param -from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable -from transformations import __fixed__, FIXED, UNFIXED +from param import ParamConcatenation +from parameter_core import Constrainable, Pickleable, Parentable, Observable, Parameterizable, adjust_name_for_printing, Gradcheckable +from transformations import __fixed__ from array_core import ParamList -class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): +class Parameterized(Parameterizable, Pickleable, Observable, Gradcheckable): """ Parameterized class @@ -54,8 +53,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): If you want to operate on all parameters use m[''] to wildcard select all paramters and concatenate them. Printing m[''] will result in printing of all parameters in detail. """ - def __init__(self, name=None): - super(Parameterized, self).__init__(name=name) + def __init__(self, name=None, *a, **kw): + super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) self._in_init_ = True self._parameters_ = ParamList() self.size = sum(p.size for p in self._parameters_) @@ -63,7 +62,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): self._fixes_ = None self._param_slices_ = [] self._connect_parameters() - self._added_names_ = set() del self._in_init_ def add_parameter(self, param, index=None): @@ -89,8 +87,8 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): self._parameters_.append(param) else: start = sum(p.size for p in self._parameters_[:index]) - self.constraints.shift(start, param.size) - self.priors.shift(start, param.size) + self.constraints.shift_right(start, param.size) + self.priors.shift_right(start, param.size) self.constraints.update(param.constraints, start) self.priors.update(param.priors, start) self._parameters_.insert(index, param) @@ -115,22 +113,19 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): """ if not param in self._parameters_: raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short()) - del self._parameters_[param._parent_index_] + + start = sum([p.size for p in self._parameters_[:param._parent_index_]]) + self._remove_parameter_name(param) self.size -= param.size - constr = param.constraints.copy() - param.constraints.clear() - param.constraints = constr - param._direct_parent_ = None - param._parent_index_ = None - param._connect_fixes() - param._notify_parent_change() - pname = adjust_name_for_printing(param.name) - if pname in self._added_names_: - del self.__dict__[pname] - self._connect_parameters() - #self._notify_parent_change() + del self._parameters_[param._parent_index_] + + param._disconnect_parent() + self.constraints.shift_left(start, param.size) self._connect_fixes() - + self._connect_parameters() + self._notify_parent_change() + + def _connect_parameters(self): # connect parameterlist to this parameterized object # This just sets up the right connection for the params objects @@ -145,19 +140,9 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): for i, p in enumerate(self._parameters_): p._direct_parent_ = self p._parent_index_ = i - not_unique = [] sizes.append(p.size + sizes[-1]) self._param_slices_.append(slice(sizes[-2], sizes[-1])) - pname = adjust_name_for_printing(p.name) - # and makes sure to not delete programmatically added parameters - if pname in self.__dict__: - if isinstance(self.__dict__[pname], (Parameterized, Param)): - if not p is self.__dict__[pname]: - not_unique.append(pname) - del self.__dict__[pname] - elif not (pname in not_unique): - self.__dict__[pname] = p - self._added_names_.add(pname) + self._add_parameter_name(p) #=========================================================================== # Pickling operations @@ -174,19 +159,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): cPickle.dump(self, f, protocol) else: cPickle.dump(self, f, protocol) - def copy(self): - """Returns a (deep) copy of the current model """ - # dc = dict() - # for k, v in self.__dict__.iteritems(): - # if k not in ['_highest_parent_', '_direct_parent_']: - # dc[k] = copy.deepcopy(v) - # dc = copy.deepcopy(self.__dict__) - # dc['_highest_parent_'] = None - # dc['_direct_parent_'] = None - # s = self.__class__.new() - # s.__dict__ = dc - return copy.deepcopy(self) def __getstate__(self): if self._has_get_set_state(): return self._getstate() @@ -243,7 +216,7 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): # Optimization handles: #=========================================================================== def _get_param_names(self): - n = numpy.array([p.name_hirarchical + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) + n = numpy.array([p.hirarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()]) return n def _get_param_names_transformed(self): n = self._get_param_names() @@ -265,14 +238,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp [numpy.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] return p - def _name_changed(self, param, old_name): - if hasattr(self, old_name) and old_name in self._added_names_: - delattr(self, old_name) - self._added_names_.remove(old_name) - pname = adjust_name_for_printing(param.name) - if pname not in self.__dict__: - self._added_names_.add(pname) - self.__dict__[pname] = param #=========================================================================== # Indexable Handling #=========================================================================== @@ -335,10 +300,6 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): # you can retrieve the original param through this method, by passing # the copy here return self._parameters_[param._parent_index_] - def hirarchy_name(self): - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "." - return '' #=========================================================================== # Get/set parameters: #=========================================================================== @@ -348,13 +309,11 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): """ if not isinstance(regexp, _pattern_type): regexp = compile(regexp) found_params = [] - for p in self._parameters_: - if regexp.match(p.name) is not None: + for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters): + if regexp.match(n) is not None: found_params.append(p) - if isinstance(p, Parameterized): - found_params.extend(p.grep_param_names(regexp)) return found_params - return [param for param in self._parameters_ if regexp.match(param.name) is not None] + def __getitem__(self, name, paramlist=None): if paramlist is None: paramlist = self.grep_param_names(name) @@ -366,36 +325,22 @@ class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable): return ParamConcatenation(paramlist) return paramlist[-1] return ParamConcatenation(paramlist) + def __setitem__(self, name, value, paramlist=None): try: param = self.__getitem__(name, paramlist) except AttributeError as a: raise a param[:] = value -# def __getattr__(self, name): -# return self.__getitem__(name) -# def __getattribute__(self, name): -# #try: -# return object.__getattribute__(self, name) - # except AttributeError: - # _, a, tb = sys.exc_info() - # try: - # return self.__getitem__(name) - # except AttributeError: - # raise AttributeError, a.message, tb def __setattr__(self, name, val): - # override the default behaviour, if setting a param, so broadcasting can by used - if hasattr(self, "_parameters_"): - paramlist = self.grep_param_names(name) - if len(paramlist) == 1: self.__setitem__(name, val, paramlist); return + # override the default behaviour, if setting a param, so broadcasting can by used + if hasattr(self, '_parameters_'): + pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False) + if name in pnames: self._parameters_[pnames.index(name)][:] = val; return object.__setattr__(self, name, val); #=========================================================================== # Printing: #=========================================================================== def _short(self): - # short string to print - if self.has_parent(): - return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) - else: - return adjust_name_for_printing(self.name) + return self.hirarchy_name() @property def flattened_parameters(self): return [xi for x in self._parameters_ for xi in x.flattened_parameters] diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index a7b26a80..05ce2109 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -3,21 +3,77 @@ Created on 6 Nov 2013 @author: maxz ''' + +import numpy as np from parameterized import Parameterized from param import Param from transformations import Logexp -class Normal(Parameterized): +class VariationalPrior(object): + def KL_divergence(self, variational_posterior): + raise NotImplementedError, "override this for variational inference of latent space" + + def update_gradients_KL(self, variational_posterior): + """ + updates the gradients for mean and variance **in place** + """ + raise NotImplementedError, "override this for variational inference of latent space" + +class NormalPrior(VariationalPrior): + def KL_divergence(self, variational_posterior): + var_mean = np.square(variational_posterior.mean).sum() + var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum() + return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data + + def update_gradients_KL(self, variational_posterior): + # dL: + variational_posterior.mean.gradient -= variational_posterior.mean + variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 + + +class VariationalPosterior(Parameterized): + def __init__(self, means=None, variances=None, name=None, **kw): + super(VariationalPosterior, self).__init__(name=name, **kw) + self.mean = Param("mean", means) + self.variance = Param("variance", variances, Logexp()) + self.add_parameters(self.mean, self.variance) + self.num_data, self.input_dim = self.mean.shape + if self.has_uncertain_inputs(): + assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion" + + def has_uncertain_inputs(self): + return not self.variance is None + + +class NormalPosterior(VariationalPosterior): ''' - Normal distribution for variational approximations. + NormalPosterior distribution for variational approximations. holds the means and variances for a factorizing multivariate normal distribution ''' - def __init__(self, means, variances, name='latent space'): - Parameterized.__init__(self, name=name) - self.mean = Param("mean", means) - self.variance = Param('variance', variances, Logexp()) - self.add_parameters(self.mean, self.variance) + + def plot(self, *args): + """ + Plot latent space X in 1D: + + See GPy.plotting.matplot_dep.variational_plots + """ + import sys + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ...plotting.matplot_dep import variational_plots + return variational_plots.plot(self,*args) + +class SpikeAndSlabPosterior(VariationalPosterior): + ''' + The SpikeAndSlab distribution for variational approximations. + ''' + def __init__(self, means, variances, binary_prob, name='latent space'): + """ + binary_prob : the probability of the distribution on the slab part. + """ + super(SpikeAndSlabPosterior, self).__init__(means, variances, name) + self.gamma = Param("binary_prob",binary_prob,) + self.add_parameter(self.gamma) def plot(self, *args): """ diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..bb3116ba 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -5,8 +5,9 @@ import numpy as np from ..util.linalg import mdot from gp import GP from parameterization.param import Param -from GPy.inference.latent_function_inference import var_dtc +from ..inference.latent_function_inference import var_dtc from .. import likelihoods +from parameterization.variational import NormalPosterior class SparseGP(GP): """ @@ -44,46 +45,45 @@ class SparseGP(GP): self.Z = Param('inducing inputs', Z) self.num_inducing = Z.shape[0] - - if not (X_variance is None): - assert X_variance.shape == X.shape - self.X_variance = X_variance - - GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) + + self.q = NormalPosterior(X, X_variance) + + GP.__init__(self, self.q.mean, Y, kernel, likelihood, inference_method=inference_method, name=name) self.add_parameter(self.Z, index=0) self.parameters_changed() - def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) - if not self.Z.is_fixed: - if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - if self.X_variance is None: - self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) - else: - self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) + def has_uncertain_inputs(self): + return self.q.has_uncertain_inputs() def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + if self.has_uncertain_inputs(): + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference_latent(self.kern, self.q, self.Z, self.likelihood, self.Y) + else: + self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) + self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) + if self.has_uncertain_inputs(): + self.kern.update_gradients_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + else: + self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) + self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) - def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ Make a prediction for the latent function values """ if X_variance_new is None: - Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + Kx = self.kern.K(self.Z, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + Kxx = self.kern.K(Xnew) + #var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) + var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) - var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) + Kxx = self.kern.Kdiag(Xnew) + var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T else: - # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" @@ -91,7 +91,7 @@ class SparseGP(GP): Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) - return mu, var[:,None] + return mu, var def _getstate(self): @@ -101,12 +101,10 @@ class SparseGP(GP): """ return GP._getstate(self) + [self.Z, self.num_inducing, - self.has_uncertain_inputs, self.X_variance] def _setstate(self, state): self.X_variance = state.pop() - self.has_uncertain_inputs = state.pop() self.num_inducing = state.pop() self.Z = state.pop() GP._setstate(self, state) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..b6030eb7 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -1,9 +1,9 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as _np -default_seed = _np.random.seed(123344) +#default_seed = _np.random.seed(123344) -def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=200, nan=False): +def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False): """ model for testing purposes. Samples from a GP with rbf kernel and learns the samples with a new kernel. Normally not for optimization, just model cheking @@ -21,19 +21,20 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) - lengthscales = _np.random.rand(input_dim) - k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) - #+ GPy.kern.white(input_dim, 0.01) - ) + #lengthscales = _np.random.rand(input_dim) + #k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) + ##+ GPy.kern.white(input_dim, 0.01) + #) + k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T - # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) - # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) p = .3 @@ -41,14 +42,14 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, if nan: m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData() - m.Y[_np.random.binomial(1,p,size=(Y.shape))] = _np.nan + m.Y[_np.random.binomial(1,p,size=(Y.shape)).astype(bool)] = _np.nan m.parameters_changed() #=========================================================================== # randomly obstruct data with percentage p #=========================================================================== #m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) - m.lengthscales = lengthscales + #m.lengthscales = lengthscales if plot: import matplotlib.pyplot as pb @@ -73,7 +74,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True): data = GPy.util.datasets.oil_100() Y = data['X'] # create simple GP model - kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) + kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) if optimize: m.optimize('scg', messages=verbose) @@ -88,7 +89,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci Y = Y - Y.mean(0) Y /= Y.std(0) # Create the model - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -138,7 +139,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 (1 - var))) + .001 Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2)) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c @@ -158,46 +159,51 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): import GPy - from GPy.likelihoods import Gaussian from matplotlib import pyplot as plt _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, 1., [.1] * Q, ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) Y = data['X'][:N] - Yn = Gaussian(Y, normalize=True) - m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) + m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) - m['noise'] = Yn.Y.var() / 100. + m['.*noise.var'] = Y.var() / 100. if optimize: m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) if plot: - y = m.likelihood.Y[0, :] + y = m.Y[0, :] fig, (latent_axes, sense_axes) = plt.subplots(1, 2) m.plot_latent(ax=latent_axes) - data_show = GPy.util.visualize.vector_show(y) - lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable + data_show = GPy.plotting.matplot_dep.visualize.vector_show(y) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') plt.close(fig) return m def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): + _np.random.seed(1234) + x = _np.linspace(0, 4 * _np.pi, N)[:, None] - s1 = _np.vectorize(lambda x: _np.sin(x)) - s2 = _np.vectorize(lambda x: _np.cos(x)) + s1 = _np.vectorize(lambda x: -_np.sin(_np.exp(x))) + s2 = _np.vectorize(lambda x: _np.cos(x)**2) s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x))) - sS = _np.vectorize(lambda x: _np.sin(2 * x)) + sS = _np.vectorize(lambda x: x*_np.sin(x)) s1 = s1(x) s2 = s2(x) s3 = s3(x) sS = sS(x) - S1 = _np.hstack([s1, sS]) + s1 -= s1.mean(); s1 /= s1.std(0) + s2 -= s2.mean(); s2 /= s2.std(0) + s3 -= s3.mean(); s3 /= s3.std(0) + sS -= sS.mean(); sS /= sS.std(0) + + S1 = _np.hstack([s1, s2, sS]) S2 = _np.hstack([s2, s3, sS]) S3 = _np.hstack([s3, sS]) @@ -268,7 +274,7 @@ def bgplvm_simulation(optimize=True, verbose=1, D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) if optimize: @@ -288,16 +294,18 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1, from GPy.models import BayesianGPLVM from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData - D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 + D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) Y = Ylist[0] - k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) + k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) - inan = _np.random.binomial(1, .3, size=Y.shape) - m = BayesianGPLVM(Y, Q, init="random", num_inducing=num_inducing, kernel=k) + inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) + m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m.inference_method = VarDTCMissingData() m.Y[inan] = _np.nan + m.q.variance *= .1 m.parameters_changed() + m.Yreal = Y if optimize: print "Optimizing model:" @@ -318,7 +326,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) likelihood_list = [Gaussian(x, normalize=True) for x in Ylist] - k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) + k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2)) m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) m.ensure_default_constraints() @@ -345,15 +353,15 @@ def brendan_faces(optimize=True, verbose=True, plot=True): m = GPy.models.GPLVM(Yn, Q) # optimize - m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) + m.constrain('rbf|noise|white', GPy.transformations.LogexpClipped()) if optimize: m.optimize('scg', messages=verbose, max_iters=1000) if plot: ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -372,8 +380,8 @@ def olivetti_faces(optimize=True, verbose=True, plot=True): if plot: ax = m.plot_latent(which_indices=(0, 1)) y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -388,8 +396,8 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru Y = data['Y'][range[0]:range[1], :].copy() if plot: y = Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.data_play(Y, data_show, frame_rate) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate) return Y def stick(kernel=None, optimize=True, verbose=True, plot=True): @@ -400,12 +408,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True): # optimize m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -419,12 +427,12 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -435,16 +443,16 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() # optimize - back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) + back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.util.visualize.visual_available: + if plot and GPy.plotting.matplot_dep.visualize.visual_available: plt.clf ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') return m @@ -470,7 +478,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2)) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() @@ -481,8 +489,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): plt.sca(latent_axes) m.plot_latent() y = m.likelihood.Y[0, :].copy() - data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) + data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) + GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) raw_input('Press enter to finish') return m @@ -501,8 +509,8 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose if plot: ax = m.plot_latent() y = m.likelihood.Y[0, :] - data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) - lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel']) + lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) raw_input('Press enter to finish') lvm_visualizer.close() diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 55567051..5cac1857 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True): Y = np.vstack((Y1, Y2)) #build the kernel - k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k1 = GPy.kern.RBF(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalize(2,1) k = k1**k2 m = GPy.models.GPRegression(X, Y, kernel=k) @@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True): # Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 # Y = np.vstack((Y1, Y2)) # -# k1 = GPy.kern.rbf(1) +# k1 = GPy.kern.RBF(1) # m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) # m.constrain_fixed('.*rbf_var', 1.) # m.optimize(max_iters=100) @@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True): Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], np.random.randint(0, 4, num_inducing)[:, None])) - k1 = GPy.kern.rbf(1) + k1 = GPy.kern.RBF(1) k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k = k1**k2 @@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 data['Y'] = data['Y'] - np.mean(data['Y']) - lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) + lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF) if plot: pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() @@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) - kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) + # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m['noise_variance'] = np.random.uniform(1e-3, 1) @@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 ax.set_ylim(ylim) return m # (models, lls) -def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF): """ Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. @@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): optimizer='scg' x_len = 30 X = np.linspace(0, 10, x_len)[:, None] - f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X)) Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] - kern = GPy.kern.rbf(1) + kern = GPy.kern.RBF(1) poisson_lik = GPy.likelihoods.Poisson() laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() @@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) - kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) + kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1]) m = GPy.models.GPRegression(X, Y, kernel) # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) @@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) #kernel += GPy.kern.bias(X.shape[1]) X_variance = np.ones(X.shape) * 0.5 m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) @@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti X = np.random.uniform(-3., 3., (num_samples, 1)) Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 # construct kernel - rbf = GPy.kern.rbf(1) + rbf = GPy.kern.RBF(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m.checkgrad(verbose=1) @@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt Y[inan] = np.nan # construct kernel - rbf = GPy.kern.rbf(2) + rbf = GPy.kern.RBF(2) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) @@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): # likelihood = GPy.likelihoods.Gaussian(Y) Z = np.random.uniform(-3., 3., (7, 1)) - k = GPy.kern.rbf(1) + k = GPy.kern.RBF(1) # create simple GP Model - no input uncertainty on this one - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) @@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): print m # the same Model with uncertainty - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) if plot: diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index 337a8477..a633c381 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -16,7 +16,9 @@ If the likelihood object is something other than Gaussian, then exact inference is not tractable. We then resort to a Laplace approximation (laplace.py) or expectation propagation (ep.py). -The inference methods return a "Posterior" instance, which is a simple +The inference methods return a +:class:`~GPy.inference.latent_function_inference.posterior.Posterior` +instance, which is a simple structure which contains a summary of the posterior. The model classes can then use this posterior object for making predictions, optimizing hyper-parameters, etc. @@ -29,3 +31,15 @@ expectation_propagation = 'foo' # TODO from GPy.inference.latent_function_inference.var_dtc import VarDTC from dtc import DTC from fitc import FITC + +# class FullLatentFunctionData(object): +# +# +# class LatentFunctionInference(object): +# def inference(self, kern, X, likelihood, Y, Y_metadata=None): +# """ +# Do inference on the latent functions given a covariance function `kern`, +# inputs and outputs `X` and `Y`, and a likelihood `likelihood`. +# Additional metadata for the outputs `Y` can be given in `Y_metadata`. +# """ +# raise NotImplementedError, "Abstract base class for full inference" \ No newline at end of file diff --git a/GPy/inference/latent_function_inference/dtc.py b/GPy/inference/latent_function_inference/dtc.py index dbbff6d0..1a811de6 100644 --- a/GPy/inference/latent_function_inference/dtc.py +++ b/GPy/inference/latent_function_inference/dtc.py @@ -32,7 +32,7 @@ class DTC(object): #make sure the noise is not hetero beta = 1./np.squeeze(likelihood.variance) if beta.size <1: - raise NotImplementedError, "no hetero noise with this implementatino of DTC" + raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) Knn = kern.Kdiag(X) @@ -89,4 +89,85 @@ class DTC(object): return post, log_marginal, grad_dict +class vDTC(object): + def __init__(self): + self.const_jitter = 1e-6 + + def inference(self, kern, X, X_variance, Z, likelihood, Y): + assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." + + #TODO: MAX! fix this! + from ...util.misc import param_to_array + Y = param_to_array(Y) + + num_inducing, _ = Z.shape + num_data, output_dim = Y.shape + + #make sure the noise is not hetero + beta = 1./np.squeeze(likelihood.variance) + if beta.size <1: + raise NotImplementedError, "no hetero noise with this implementation of DTC" + + Kmm = kern.K(Z) + Knn = kern.Kdiag(X) + Knm = kern.K(X, Z) + U = Knm + Uy = np.dot(U.T,Y) + + #factor Kmm + Kmmi, L, Li, _ = pdinv(Kmm) + + # Compute A + LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) + A_ = tdot(LiUTbeta) + trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_)) + A = A_ + np.eye(num_inducing) + + # factor A + LA = jitchol(A) + + # back substutue to get b, P, v + tmp, _ = dtrtrs(L, Uy, lower=1) + b, _ = dtrtrs(LA, tmp*beta, lower=1) + tmp, _ = dtrtrs(LA, b, lower=1, trans=1) + v, _ = dtrtrs(L, tmp, lower=1, trans=1) + tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) + P = tdot(tmp.T) + + #compute log marginal + log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ + -np.sum(np.log(np.diag(LA)))*output_dim + \ + 0.5*num_data*output_dim*np.log(beta) + \ + -0.5*beta*np.sum(np.square(Y)) + \ + 0.5*np.sum(np.square(b)) + \ + trace_term + + # Compute dL_dKmm + vvT_P = tdot(v.reshape(-1,1)) + P + LAL = Li.T.dot(A).dot(Li) + dL_dK = Kmmi - 0.5*(vvT_P + LAL) + + # Compute dL_dU + vY = np.dot(v.reshape(-1,1),Y.T) + #dL_dU = vY - np.dot(vvT_P, U.T) + dL_dU = vY - np.dot(vvT_P - Kmmi, U.T) + dL_dU *= beta + + #compute dL_dR + Uv = np.dot(U, v) + dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 + dL_dR -=beta*trace_term/num_data + + grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T} + + #update gradients + kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + likelihood.update_gradients(dL_dR) + + #construct a posterior object + post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) + + + return post, log_marginal, grad_dict + diff --git a/GPy/inference/latent_function_inference/ep.py b/GPy/inference/latent_function_inference/ep.py index aa106067..87c08221 100644 --- a/GPy/inference/latent_function_inference/ep.py +++ b/GPy/inference/latent_function_inference/ep.py @@ -3,390 +3,91 @@ from scipy import stats from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from likelihood import likelihood -class EP(likelihood): - def __init__(self,data,noise_model): - """ - Expectation Propagation - - :param data: data to model - :type data: numpy array - :param noise_model: noise distribution - :type noise_model: A GPy noise model - - """ - self.noise_model = noise_model - self.data = data - self.num_data, self.output_dim = self.data.shape - self.is_heteroscedastic = True - self.num_params = 0 - - #Initial values - Likelihood approximation parameters: - #p(y|f) = t(f|tau_tilde,v_tilde) - self.tau_tilde = np.zeros(self.num_data) - self.v_tilde = np.zeros(self.num_data) - - #initial values for the GP variables - self.Y = np.zeros((self.num_data,1)) - self.covariance_matrix = np.eye(self.num_data) - self.precision = np.ones(self.num_data)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - super(EP, self).__init__() - - def restart(self): - self.tau_tilde = np.zeros(self.num_data) - self.v_tilde = np.zeros(self.num_data) - self.Y = np.zeros((self.num_data,1)) - self.covariance_matrix = np.eye(self.num_data) - self.precision = np.ones(self.num_data)[:,None] - self.Z = 0 - self.YYT = None - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = 0. - - def predictive_values(self,mu,var,full_cov,**noise_args): - if full_cov: - raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" - return self.noise_model.predictive_values(mu,var,**noise_args) - - def log_predictive_density(self, y_test, mu_star, var_star): - """ - Calculation of the log predictive density - - .. math: - p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*}) - - :param y_test: test observations (y_{*}) - :type y_test: (Nx1) array - :param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*}) - :type mu_star: (Nx1) array - :param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*}) - :type var_star: (Nx1) array - """ - return self.noise_model.log_predictive_density(y_test, mu_star, var_star) - - def _get_params(self): - #return np.zeros(0) - return self.noise_model._get_params() - - def _get_param_names(self): - #return [] - return self.noise_model._get_param_names() - - def _set_params(self,p): - #pass # TODO: the EP likelihood might want to take some parameters... - self.noise_model._set_params(p) - - def _gradients(self,partial): - #return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... - return self.noise_model._gradients(partial) - - def _compute_GP_variables(self): - #Variables to be called from GP - mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model - sigma_sum = 1./self.tau_ + 1./self.tau_tilde - mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 - self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep - self.Z += 0.5*self.num_data*np.log(2*np.pi) - - self.Y = mu_tilde[:,None] - self.YYT = np.dot(self.Y,self.Y.T) - self.covariance_matrix = np.diag(1./self.tau_tilde) - self.precision = self.tau_tilde[:,None] - self.V = self.precision * self.Y - self.VVT_factor = self.V - self.trYYT = np.trace(self.YYT) - - def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]): +class EP(object): + def __init__(self, epsilon=1e-6, eta=1., delta=1.): """ The expectation-propagation algorithm. For nomenclature see Rasmussen & Williams 2006. :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - + :param eta: Power EP thing TODO: Ricardo: what, exactly? + :type eta: float64 + :param delta: Power EP thing TODO: Ricardo: what, exactly? + :type delta: float64 """ - self.epsilon = epsilon - self.eta, self.delta = power_ep + self.epsilon, self.eta, self.delta = epsilon, eta, delta + self.reset() + + def reset(self): + self.old_mutilde, self.old_vtilde = None, None + + def inference(self, kern, X, likelihood, Y, Y_metadata=None): + + K = kern.K(X) + + mu_tilde, tau_tilde = self.expectation_propagation() + + + def expectation_propagation(self, K, Y, Y_metadata, likelihood) + + num_data, data_dim = Y.shape + assert data_dim == 1, "This EP methods only works for 1D outputs" + #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) mu = np.zeros(self.num_data) Sigma = K.copy() - """ - Initial values - Cavity distribution parameters: - q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) + Z_hat = np.empty(num_data,dtype=np.float64) + mu_hat = np.empty(num_data,dtype=np.float64) + sigma2_hat = np.empty(num_data,dtype=np.float64) + + #initial values - Gaussian factors + if self.old_mutilde is None: + tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data)) + else: + assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" + mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde + tau_tilde = v_tilde/mu_tilde #Approximation epsilon_np1 = self.epsilon + 1. epsilon_np2 = self.epsilon + 1. - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) + iterations = 0 + while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon): + update_order = np.random.permutation(num_data) for i in update_order: #Cavity distribution parameters - self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] + tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i] + v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i] #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) + Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i])) #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v + delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) + delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) + tau_tilde[i] += delta_tau + v_tilde[i] += delta_v #Posterior distribution parameters update - DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i]))) - mu = np.dot(Sigma,self.v_tilde) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K - B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K + DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i])) + mu = np.dot(Sigma, v_tilde) + iterations += 1 + + #(re) compute Sigma and mu using full Cholesky decompy + tau_tilde_root = np.sqrt(tau_tilde) + Sroot_tilde_K = tau_tilde_root[:,None] * K + B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:] L = jitchol(B) - V,info = dtrtrs(L,Sroot_tilde_K,lower=1) + V, _ = dtrtrs(L, Sroot_tilde_K, lower=1) Sigma = K - np.dot(V.T,V) - mu = np.dot(Sigma,self.v_tilde) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) + mu = np.dot(Sigma,v_tilde) - return self._compute_GP_variables() + #monitor convergence + epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old)) + epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old)) + tau_tilde_old = tau_tilde.copy() + v_tilde_old = v_tilde.copy() - def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see ... 2013. + return mu, Sigma, mu_tilde, tau_tilde - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - - """ - self.epsilon = epsilon - self.eta, self.delta = power_ep - - num_inducing = Kmm.shape[0] - - #TODO: this doesn't work with uncertain inputs! - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = Qnn = Knm*Kmmi*Kmn - """ - KmnKnm = np.dot(Kmn,Kmn.T) - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - KmmiKmn = np.dot(Kmmi,Kmn) - Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - LLT0 = Kmm.copy() - - #Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm) - #KmnKnm = np.dot(Kmn, Kmn.T) - #KmmiKmn = np.dot(Kmmi,Kmn) - #Qnn_diag = np.sum(Kmn*KmmiKmn,-2) - #LLT0 = Kmm.copy() - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - mu = np.zeros(self.num_data) - LLT = Kmm.copy() - Sigma_diag = Qnn_diag.copy() - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - np1 = [self.tau_tilde.copy()] - np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau - L = jitchol(LLT) - #cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau)) - V,info = dtrtrs(L,Kmn,lower=1) - Sigma_diag = np.sum(V*V,-2) - si = np.sum(V.T*V[:,i],-1) - mu += (Delta_v-Delta_tau*mu[i])*si - self.iterations += 1 - #Sigma recomputation with Cholesky decompositon - LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T) - L = jitchol(LLT) - V,info = dtrtrs(L,Kmn,lower=1) - V2,info = dtrtrs(L.T,V,lower=0) - Sigma_diag = np.sum(V*V,-2) - Knmv_tilde = np.dot(Kmn,self.v_tilde) - mu = np.dot(V2.T,Knmv_tilde) - epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data - np1.append(self.tau_tilde.copy()) - np2.append(self.v_tilde.copy()) - - self._compute_GP_variables() - - def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]): - """ - The expectation-propagation algorithm with sparse pseudo-input. - For nomenclature see Naish-Guzman and Holden, 2008. - - :param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) - :type epsilon: float - :param power_ep: Power EP parameters - :type power_ep: list of floats - """ - self.epsilon = epsilon - self.eta, self.delta = power_ep - - num_inducing = Kmm.shape[0] - - """ - Prior approximation parameters: - q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0) - Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn - """ - Lm = jitchol(Kmm) - Lmi = chol_inv(Lm) - Kmmi = np.dot(Lmi.T,Lmi) - P0 = Kmn.T - KmnKnm = np.dot(P0.T, P0) - KmmiKmn = np.dot(Kmmi,P0.T) - Qnn_diag = np.sum(P0.T*KmmiKmn,-2) - Diag0 = Knn_diag - Qnn_diag - R0 = jitchol(Kmmi).T - - """ - Posterior approximation: q(f|y) = N(f| mu, Sigma) - Sigma = Diag + P*R.T*R*P.T + K - mu = w + P*Gamma - """ - self.w = np.zeros(self.num_data) - self.Gamma = np.zeros(num_inducing) - mu = np.zeros(self.num_data) - P = P0.copy() - R = R0.copy() - Diag = Diag0.copy() - Sigma_diag = Knn_diag - RPT0 = np.dot(R0,P0.T) - - """ - Initial values - Cavity distribution parameters: - q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)} - sigma_ = 1./tau_ - mu_ = v_/tau_ - """ - self.tau_ = np.empty(self.num_data,dtype=float) - self.v_ = np.empty(self.num_data,dtype=float) - - #Initial values - Marginal moments - z = np.empty(self.num_data,dtype=float) - self.Z_hat = np.empty(self.num_data,dtype=float) - phi = np.empty(self.num_data,dtype=float) - mu_hat = np.empty(self.num_data,dtype=float) - sigma2_hat = np.empty(self.num_data,dtype=float) - - #Approximation - epsilon_np1 = 1 - epsilon_np2 = 1 - self.iterations = 0 - self.np1 = [self.tau_tilde.copy()] - self.np2 = [self.v_tilde.copy()] - while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: - update_order = np.random.permutation(self.num_data) - for i in update_order: - #Cavity distribution parameters - self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] - self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] - #Marginal moments - self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self.data[i],self.tau_[i],self.v_[i]) - #Site parameters update - Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) - Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) - self.tau_tilde[i] += Delta_tau - self.v_tilde[i] += Delta_v - #Posterior distribution parameters update - dtd1 = Delta_tau*Diag[i] + 1. - dii = Diag[i] - Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 - pi_ = P[i,:].reshape(1,num_inducing) - P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ - Rp_i = np.dot(R,pi_.T) - RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) - R = jitchol(RTR).T - self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 - self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - mu = self.w + np.dot(P,self.Gamma) - self.iterations += 1 - #Sigma recomptutation with Cholesky decompositon - Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) - Diag = Diag0 * Iplus_Dprod_i - P = Iplus_Dprod_i[:,None] * P0 - safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) - L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) - R,info = dtrtrs(L,R0,lower=1) - RPT = np.dot(R,P.T) - Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) - self.w = Diag * self.v_tilde - self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) - mu = self.w + np.dot(P,self.Gamma) - epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data - epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data - self.np1.append(self.tau_tilde.copy()) - self.np2.append(self.v_tilde.copy()) - - return self._compute_GP_variables() diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py index f28bf9d1..a996e1df 100644 --- a/GPy/inference/latent_function_inference/posterior.py +++ b/GPy/inference/latent_function_inference/posterior.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify, jitchol, dtrtri +from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol class Posterior(object): """ @@ -81,13 +81,17 @@ class Posterior(object): def covariance(self): if self._covariance is None: #LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1) - self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) - return self._covariance + self._covariance = np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T + #self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K) + return self._covariance.squeeze() @property def precision(self): if self._precision is None: - self._precision, _, _, _ = pdinv(self.covariance) + cov = np.atleast_3d(self.covariance) + self._precision = np.zeros(cov.shape) # if one covariance per dimension + for p in xrange(cov.shape[-1]): + self._precision[:,:,p] = pdinv(cov[:,:,p])[0] return self._precision @property @@ -95,7 +99,10 @@ class Posterior(object): if self._woodbury_chol is None: #compute woodbury chol from if self._woodbury_inv is not None: - _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv) + winv = np.atleast_3d(self._woodbury_inv) + self._woodbury_chol = np.zeros(winv.shape) + for p in xrange(winv.shape[-1]): + self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2] #Li = jitchol(self._woodbury_inv) #self._woodbury_chol, _ = dtrtri(Li) #W, _, _, _, = pdinv(self._woodbury_inv) @@ -129,7 +136,7 @@ class Posterior(object): @property def K_chol(self): if self._K_chol is None: - self._K_chol = dportf(self._K) + self._K_chol = jitchol(self._K) return self._K_chol diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 264f7fc3..349cd72d 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -43,9 +43,20 @@ class VarDTC(object): return Y * prec # TODO chache this, and make it effective def inference(self, kern, X, X_variance, Z, likelihood, Y): + """Inference for normal sparseGP""" + uncertain_inputs = False + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def inference_latent(self, kern, posterior_variational, Z, likelihood, Y): + """Inference for GPLVM with uncertain inputs""" + uncertain_inputs = True + psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def _inference(self, kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs): #see whether we're using variational uncertain inputs - uncertain_inputs = not (X_variance is None) _, output_dim = Y.shape @@ -60,20 +71,87 @@ class VarDTC(object): trYYT = self.get_trYYT(Y) # do the inference: - dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, \ - psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood = _do_inference_on( - kern, X, X_variance, Z, likelihood, - uncertain_inputs, output_dim, - beta, VVT_factor, trYYT) + het_noise = beta.size < 1 + num_inducing = Z.shape[0] + num_data = Y.shape[0] + # kernel computations, using BGPLVM notation + Kmm = kern.K(Z) + + Lm = jitchol(Kmm) + + # The rather complex computations of A + if uncertain_inputs: + if het_noise: + psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) + else: + psi2_beta = psi2.sum(0) * beta + #if 0: + # evals, evecs = linalg.eigh(psi2_beta) + # clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable + # if not np.array_equal(evals, clipped_evals): + # pass # print evals + # tmp = evecs * np.sqrt(clipped_evals) + # tmp = tmp.T + # no backsubstitution because of bound explosion on tr(A) if not... + LmInv = dtrtri(Lm) + A = LmInv.dot(psi2_beta.dot(LmInv.T)) + else: + if het_noise: + tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) + else: + tmp = psi1 * (np.sqrt(beta)) + tmp, _ = dtrtrs(Lm, tmp.T, lower=1) + A = tdot(tmp) #print A.sum() - likelihood.update_gradients(partial_for_likelihood) + # factor B + B = np.eye(num_inducing) + A + LB = jitchol(B) + psi1Vf = np.dot(psi1.T, VVT_factor) + # back substutue C into psi1Vf + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + + # data fit and derivative of L w.r.t. Kmm + delit = tdot(_LBi_Lmi_psi1Vf) + data_fit = np.trace(delit) + DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) + delit = -0.5 * DBi_plus_BiPBi + delit += -0.5 * B * output_dim + delit += output_dim * np.eye(num_inducing) + # Compute dL_dKmm + dL_dKmm = backsub_both_sides(Lm, delit) + + # derivatives of L w.r.t. psi + dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, + VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, + psi1, het_noise, uncertain_inputs) + + # log marginal likelihood + log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, + psi0, A, LB, trYYT, data_fit) + + #put the gradients in the right places + partial_for_likelihood = _compute_partial_for_likelihood(likelihood, + het_noise, uncertain_inputs, LB, + _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, + psi0, psi1, beta, + data_fit, num_data, output_dim, trYYT) + + #likelihood.update_gradients(partial_for_likelihood) if uncertain_inputs: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2} - kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'partial_for_likelihood':partial_for_likelihood} else: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1} - kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'partial_for_likelihood':partial_for_likelihood} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? @@ -123,23 +201,35 @@ class VarDTCMissingData(object): else: self._subarray_indices = [[slice(None),slice(None)]] return [Y], [(Y**2).sum()] - + def inference(self, kern, X, X_variance, Z, likelihood, Y): + """Inference for normal sparseGP""" + uncertain_inputs = False + psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def inference_latent(self, kern, posterior_variational, Z, likelihood, Y): + """Inference for GPLVM with uncertain inputs""" + uncertain_inputs = True + psi0, psi1, psi2 = _compute_psi_latent(kern, posterior_variational, Z) + return self._inference(kern, psi0, psi1, psi2, Z, likelihood, Y, uncertain_inputs) + + def _inference(self, kern, psi0_all, psi1_all, psi2_all, Z, likelihood, Y, uncertain_inputs): Ys, traces = self._Y(Y) beta_all = 1./likelihood.variance - uncertain_inputs = not (X_variance is None) het_noise = beta_all.size != 1 import itertools num_inducing = Z.shape[0] - dL_dpsi0_all = np.zeros(X.shape[0]) - dL_dpsi1_all = np.zeros((X.shape[0], num_inducing)) + dL_dpsi0_all = np.zeros(Y.shape[0]) + dL_dpsi1_all = np.zeros((Y.shape[0], num_inducing)) if uncertain_inputs: - dL_dpsi2_all = np.zeros((X.shape[0], num_inducing, num_inducing)) + dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing)) partial_for_likelihood = 0 - LB_all = Cpsi1Vf_all = 0 + woodbury_vector = np.zeros((num_inducing, Y.shape[1])) + woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1])) dL_dKmm = 0 log_marginal = 0 @@ -148,11 +238,10 @@ class VarDTCMissingData(object): Lm = jitchol(Kmm) if uncertain_inputs: LmInv = dtrtri(Lm) - # kernel computations, using BGPLVM notation - psi0_all, psi1_all, psi2_all = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) - VVT_factor_all = np.empty(Y.shape) full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] + if not full_VVT_factor: + psi1V = np.dot(Y.T*beta_all, psi1_all).T for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices): if het_noise: beta = beta_all[ind] @@ -183,10 +272,10 @@ class VarDTCMissingData(object): LB = jitchol(B) psi1Vf = psi1.T.dot(VVT_factor) - _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - - if full_VVT_factor: Cpsi1Vf_all += Cpsi1Vf - LB_all += LB + tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) + _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) + tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) + Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) @@ -219,92 +308,67 @@ class VarDTCMissingData(object): psi0, psi1, beta, data_fit, num_data, output_dim, trYYT) - # gradients: - likelihood.update_gradients(partial_for_likelihood) + if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf + else: + print 'foobar' + tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + tmp, _ = dpotrs(LB, tmp, lower=1) + woodbury_vector[:, ind] = dtrtrs(Lm, tmp, lower=1, trans=1)[0] + + #import ipdb;ipdb.set_trace() + Bi, _ = dpotri(LB, lower=1) + symmetrify(Bi) + Bi = -dpotri(LB, lower=1)[0] + from ...util import diag + diag.add(Bi, 1) + woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] + # gradients: if uncertain_inputs: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi2':dL_dpsi2_all} - kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dpsi0':dL_dpsi0_all, + 'dL_dpsi1':dL_dpsi1_all, + 'dL_dpsi2':dL_dpsi2_all, + 'partial_for_likelihood':partial_for_likelihood} else: - grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0_all, 'dL_dKnm':dL_dpsi1_all} - kern.update_gradients_sparse(X=X, Z=Z, **grad_dict) + grad_dict = {'dL_dKmm': dL_dKmm, + 'dL_dKdiag':dL_dpsi0_all, + 'dL_dKnm':dL_dpsi1_all, + 'partial_for_likelihood':partial_for_likelihood} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? - if full_VVT_factor: - woodbury_vector = Cpsi1Vf_all # == Cpsi1V - else: - print 'foobar' - psi1V = np.dot(Y.T*beta_all, psi1_all).T - tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) - tmp, _ = dpotrs(LB_all, tmp, lower=1) - woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - - Bi, _ = dpotri(LB_all, lower=1) - symmetrify(Bi) - Bi = -dpotri(LB_all, lower=1)[0] - from ...util import diag - diag.add(Bi, 1) + #if not full_VVT_factor: + # print 'foobar' + # psi1V = np.dot(Y.T*beta_all, psi1_all).T + # tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) + # tmp, _ = dpotrs(LB_all, tmp, lower=1) + # woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) + #import ipdb;ipdb.set_trace() + #Bi, _ = dpotri(LB_all, lower=1) + #symmetrify(Bi) + #Bi = -dpotri(LB_all, lower=1)[0] + #from ...util import diag + #diag.add(Bi, 1) - woodbury_inv = backsub_both_sides(Lm, Bi) - post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) + #woodbury_inv = backsub_both_sides(Lm, Bi) + + post = Posterior(woodbury_inv=woodbury_inv_all, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict -def _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm): -# The rather complex computations of A - if uncertain_inputs: - if het_noise: - psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) - else: - psi2_beta = psi2.sum(0) * beta - #if 0: - # evals, evecs = linalg.eigh(psi2_beta) - # clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable - # if not np.array_equal(evals, clipped_evals): - # pass # print evals - # tmp = evecs * np.sqrt(clipped_evals) - # tmp = tmp.T - # no backsubstitution because of bound explosion on tr(A) if not... - LmInv = dtrtri(Lm) - A = LmInv.dot(psi2_beta.dot(LmInv.T)) - else: - if het_noise: - tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) - else: - tmp = psi1 * (np.sqrt(beta)) - tmp, _ = dtrtrs(Lm, tmp.T, lower=1) - A = tdot(tmp) #print A.sum() - return A - - -def _compute_psi(kern, X, X_variance, Z, uncertain_inputs): - if uncertain_inputs: - psi0 = kern.psi0(Z, X, X_variance) - psi1 = kern.psi1(Z, X, X_variance) - psi2 = kern.psi2(Z, X, X_variance) - else: - psi0 = kern.Kdiag(X) - psi1 = kern.K(X, Z) - psi2 = None +def _compute_psi(kern, X, X_variance, Z): + psi0 = kern.Kdiag(X) + psi1 = kern.K(X, Z) + psi2 = None return psi0, psi1, psi2 -def _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs): - Kmm = kern.K(Z) - psi0, psi1, psi2 = _compute_psi(kern, X, X_variance, Z, uncertain_inputs) - return Kmm, psi0, psi1, psi2 - -def _compute_dL_dKmm(num_inducing, output_dim, Lm, B, LB, _LBi_Lmi_psi1Vf): - # Compute dL_dKmm - delit = tdot(_LBi_Lmi_psi1Vf) - data_fit = np.trace(delit) - DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) - delit = -0.5 * DBi_plus_BiPBi - delit += -0.5 * B * output_dim - delit += output_dim * np.eye(num_inducing) - dL_dKmm = backsub_both_sides(Lm, delit) - return DBi_plus_BiPBi, data_fit, dL_dKmm +def _compute_psi_latent(kern, posterior_variational, Z): + psi0 = kern.psi0(Z, posterior_variational) + psi1 = kern.psi1(Z, posterior_variational) + psi2 = kern.psi2(Z, posterior_variational) + return psi0, psi1, psi2 def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs): dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten() @@ -329,15 +393,6 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C return dL_dpsi0, dL_dpsi1, dL_dpsi2 -def _compute_psi1Vf(Lm, LB, psi1Vf): - # back substutue C into psi1Vf - tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) - _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) - tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) - Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) - return _LBi_Lmi_psi1Vf, Cpsi1Vf - - def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT): # the partial derivative vector for the likelihood if likelihood.size == 0: @@ -379,35 +434,3 @@ def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het lik_4 = 0.5 * data_fit log_marginal = lik_1 + lik_2 + lik_3 + lik_4 return log_marginal - -def _do_inference_on(kern, X, X_variance, Z, likelihood, uncertain_inputs, output_dim, beta, VVT_factor, trYYT): - het_noise = beta.size < 1 - num_inducing = Z.shape[0] - num_data = X.shape[0] - # kernel computations, using BGPLVM notation - Kmm, psi0, psi1, psi2 = _compute_Kmm(kern, X, X_variance, Z, uncertain_inputs) - #factor Kmm # TODO: cache? - Lm = jitchol(Kmm) - A = _compute_A(num_data, uncertain_inputs, beta, het_noise, psi1, psi2, Lm) - # factor B - B = np.eye(num_inducing) + A - LB = jitchol(B) - psi1Vf = np.dot(psi1.T, VVT_factor) - _LBi_Lmi_psi1Vf, Cpsi1Vf = _compute_psi1Vf(Lm, LB, psi1Vf) - # data fit and derivative of L w.r.t. Kmm - DBi_plus_BiPBi, data_fit, dL_dKmm = _compute_dL_dKmm(num_inducing, output_dim, - Lm, B, LB, _LBi_Lmi_psi1Vf) - # derivatives of L w.r.t. psi - dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, - VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, - psi1, het_noise, uncertain_inputs) - # log marginal likelihood - log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, - psi0, A, LB, trYYT, data_fit) - #put the gradients in the right places - partial_for_likelihood = _compute_partial_for_likelihood(likelihood, - het_noise, uncertain_inputs, LB, - _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, - psi0, psi1, beta, - data_fit, num_data, output_dim, trYYT) - return dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Cpsi1Vf, psi1, Lm, LB, log_marginal, Kmm, partial_for_likelihood diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index eb4076c3..594ff6d3 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,9 +1,34 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * +from _src.rbf import RBF +from _src.white import White +from _src.kern import Kern +from _src.linear import Linear +from _src.bias import Bias +from _src.brownian import Brownian +from _src.stationary import Exponential, Matern32, Matern52, ExpQuad +#import coregionalize +#import exponential +#import eq_ode1 +#import finite_dimensional +#import fixed +#import gibbs +#import hetero +#import hierarchical +#import independent_outputs +#import linear +#import Matern32 +#import Matern52 +#import mlp +#import ODE_1 +#import periodic_exponential +#import periodic_Matern32 +#import periodic_Matern52 +#import poly +#import prod_orthogonal +#import prod +#import rational_quadratic +#import rbfcos +#import rbf +#import rbf_inv +#import spline +#import symmetric +#import white diff --git a/GPy/kern/parts/ODE_1.py b/GPy/kern/_src/ODE_1.py similarity index 100% rename from GPy/kern/parts/ODE_1.py rename to GPy/kern/_src/ODE_1.py diff --git a/GPy/kern/_src/__init__.py b/GPy/kern/_src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py new file mode 100644 index 00000000..d5515d98 --- /dev/null +++ b/GPy/kern/_src/add.py @@ -0,0 +1,215 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from linear import Linear +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param +from kern import Kern + +class Add(Kern): + def __init__(self, subkerns, tensor): + assert all([isinstance(k, Kern) for k in subkerns]) + if tensor: + input_dim = sum([k.input_dim for k in subkerns]) + self.input_slices = [] + n = 0 + for k in subkerns: + self.input_slices.append(slice(n, n+k.input_dim)) + n += k.input_dim + else: + assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + input_dim = subkerns[0].input_dim + self.input_slices = [slice(None) for k in subkerns] + super(Add, self).__init__(input_dim, 'add') + self.add_parameters(*subkerns) + + + def K(self, X, X2=None): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handLes this as X2 == X. + """ + assert X.shape[1] == self.input_dim + if X2 is None: + return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) + else: + return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + + def update_gradients_full(self, dL_dK, X): + [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros_like(X) + if X2 is None: + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + else: + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def Kdiag(self, X): + assert X.shape[1] == self.input_dim + return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + + + def psi0(self, Z, mu, S): + return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) + + def psi1(self, Z, mu, S): + return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) + + def psi2(self, Z, mu, S): + psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) + + # compute the "cross" terms + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed + + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): + tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed + + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + + + p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed + + target = np.zeros(Z.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + + + target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + return target + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed + + target_mu = np.zeros(mu.shape) + target_S = np.zeros(S.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + + + a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target_mu += a + target_S += b + return target_mu, target_S + + def plot(self, *args, **kwargs): + """ + See GPy.plotting.matplot_dep.plot + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + kernel_plots.plot(self,*args) + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return Parameterized._getstate(self) + [#self._parameters_, + self.input_dim, + self.input_slices, + self._param_slices_ + ] + + def _setstate(self, state): + self._param_slices_ = state.pop() + self.input_slices = state.pop() + self.input_dim = state.pop() + Parameterized._setstate(self, state) + + diff --git a/GPy/kern/_src/bias.py b/GPy/kern/_src/bias.py new file mode 100644 index 00000000..e1938c95 --- /dev/null +++ b/GPy/kern/_src/bias.py @@ -0,0 +1,62 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +import numpy as np + +class Bias(Kern): + def __init__(self,input_dim,variance=1.,name=None): + super(Bias, self).__init__(input_dim, name) + self.variance = Param("variance", variance, Logexp()) + self.add_parameter(self.variance) + + def K(self, X, X2=None): + shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) + ret = np.empty(shape, dtype=np.float64) + ret[:] = self.variance + return ret + + def Kdiag(self,X): + ret = np.empty((X.shape[0],), dtype=np.float64) + ret[:] = self.variance + return ret + + def update_gradients_full(self, dL_dK, X, X2=None): + self.variance.gradient = dL_dK.sum() + + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = dL_dK.sum() + + def gradients_X(self, dL_dK,X, X2, target): + return np.zeros(X.shape) + + def gradients_X_diag(self,dL_dKdiag,X,target): + return np.zeros(X.shape) + + + #---------------------------------------# + # PSI statistics # + #---------------------------------------# + + def psi0(self, Z, mu, S): + return self.Kdiag(mu) + + def psi1(self, Z, mu, S, target): + return self.K(mu, S) + + def psi2(self, Z, mu, S, target): + ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) + ret[:] = self.variance**2 + return ret + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(Z.shape) + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + return np.zeros(mu.shape), np.zeros(S.shape) diff --git a/GPy/kern/_src/brownian.py b/GPy/kern/_src/brownian.py new file mode 100644 index 00000000..81b57a25 --- /dev/null +++ b/GPy/kern/_src/brownian.py @@ -0,0 +1,50 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +import numpy as np + +class Brownian(Kern): + """ + Brownian motion in 1D only. + + Negative times are treated as a separate (backwards!) Brownian motion. + + :param input_dim: the number of input dimensions + :type input_dim: int + :param variance: + :type variance: float + """ + def __init__(self, input_dim=1, variance=1., name='Brownian'): + assert input_dim==1, "Brownian motion in 1D only" + super(Brownian, self).__init__(input_dim, name) + + self.variance = Param('variance', variance, Logexp()) + self.add_parameters(self.variance) + + def K(self,X,X2=None): + if X2 is None: + X2 = X + return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.) + + def Kdiag(self,X): + return self.variance*np.abs(X.flatten()) + + def update_gradients_full(self, dL_dK, X, X2=None): + if X2 is None: + X2 = X + self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)) + + #def update_gradients_diag(self, dL_dKdiag, X): + #self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag) + + #def gradients_X(self, dL_dK, X, X2=None): + #if X2 is None: + #return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None] + #else: + #return np.sum(np.where(np.logical_and(np.abs(X)output_dim-1: + if self.rank>output_dim: print("Warning: Unusual choice of rank, it should normally be less than the output_dim.") if W is None: - W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) + W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank) else: - assert W.shape==(self.output_dim,self.rank) - self.W = Param('W',W) + assert W.shape==(self.output_dim, self.rank) + self.W = Param('W', W) if kappa is None: kappa = 0.5*np.ones(self.output_dim) else: - assert kappa.shape==(self.output_dim,) - self.kappa = Param('kappa', kappa) + assert kappa.shape==(self.output_dim, ) + self.kappa = Param('kappa', kappa, Logexp()) self.add_parameters(self.W, self.kappa) self.parameters_changed() @@ -56,54 +57,58 @@ class Coregionalize(Kernpart): def parameters_changed(self): self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) - def K(self,index,index2,target): - index = np.asarray(index,dtype=np.int) + def K(self, X, X2=None): + index = np.asarray(X, dtype=np.int) #here's the old code (numpy) #if index2 is None: #index2 = index #else: - #index2 = np.asarray(index2,dtype=np.int) + #index2 = np.asarray(index2, dtype=np.int) #false_target = target.copy() - #ii,jj = np.meshgrid(index,index2) - #ii,jj = ii.T, jj.T - #false_target += self.B[ii,jj] + #ii, jj = np.meshgrid(index, index2) + #ii, jj = ii.T, jj.T + #false_target += self.B[ii, jj] - if index2 is None: + + if X2 is None: + target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64) code=""" for(int i=0;i + #include + """ + weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], + 'extra_link_args' : ['-lgomp']} + + mu = pv.mean + N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) + weave.inline(code, support_code=support_code, libraries=['gomp'], + arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], + type_converters=weave.converters.blitz,**weave_options) + + + def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): + AZA = self.variances*self._ZAinner(pv, Z) + code=""" + int n,m,mm,q; + #pragma omp parallel for private(n,mm,q) + for(m=0;m + #include + """ + weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], + 'extra_link_args' : ['-lgomp']} + + N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] + mu = param_to_array(pv.mean) + weave.inline(code, support_code=support_code, libraries=['gomp'], + arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], + type_converters=weave.converters.blitz,**weave_options) + + + def _mu2S(self, pv): + return np.square(pv.mean) + pv.variance + + def _ZAinner(self, pv, Z): + ZA = Z*self.variances + inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) + diag_indices = np.diag_indices(pv.mean.shape[1], 2) + inner[:, diag_indices[0], diag_indices[1]] += pv.variance + + return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! + diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/_src/mlp.py similarity index 100% rename from GPy/kern/parts/mlp.py rename to GPy/kern/_src/mlp.py diff --git a/GPy/kern/parts/odekern1.c b/GPy/kern/_src/odekern1.c similarity index 100% rename from GPy/kern/parts/odekern1.c rename to GPy/kern/_src/odekern1.c diff --git a/GPy/kern/parts/periodic_Matern32.py b/GPy/kern/_src/periodic_Matern32.py similarity index 100% rename from GPy/kern/parts/periodic_Matern32.py rename to GPy/kern/_src/periodic_Matern32.py diff --git a/GPy/kern/parts/periodic_Matern52.py b/GPy/kern/_src/periodic_Matern52.py similarity index 100% rename from GPy/kern/parts/periodic_Matern52.py rename to GPy/kern/_src/periodic_Matern52.py diff --git a/GPy/kern/parts/periodic_exponential.py b/GPy/kern/_src/periodic_exponential.py similarity index 100% rename from GPy/kern/parts/periodic_exponential.py rename to GPy/kern/_src/periodic_exponential.py diff --git a/GPy/kern/parts/poly.py b/GPy/kern/_src/poly.py similarity index 100% rename from GPy/kern/parts/poly.py rename to GPy/kern/_src/poly.py diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py new file mode 100644 index 00000000..1d033f70 --- /dev/null +++ b/GPy/kern/_src/prod.py @@ -0,0 +1,65 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from kern import Kern +import numpy as np + +class Prod(Kern): + """ + Computes the product of 2 kernels + + :param k1, k2: the kernels to multiply + :type k1, k2: Kern + :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces + :type tensor: Boolean + :rtype: kernel object + + """ + def __init__(self, k1, k2, tensor=False): + if tensor: + super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) + self.slice1 = slice(0,k1.input_dim) + self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim) + else: + assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." + super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) + self.slice1 = slice(0, self.input_dim) + self.slice2 = slice(0, self.input_dim) + self.k1 = k1 + self.k2 = k2 + self.add_parameters(self.k1, self.k2) + + def K(self, X, X2=None): + if X2 is None: + return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) + else: + return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) + + def Kdiag(self, X): + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + + def update_gradients_full(self, dL_dK, X): + self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) + self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) + self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) + + def gradients_X(self, dL_dK, X, X2=None): + target = np.zeros(X.shape) + if X2 is None: + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None) + else: + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2]) + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2]) + return target + + diff --git a/GPy/kern/parts/prod_orthogonal.py b/GPy/kern/_src/prod_orthogonal.py similarity index 100% rename from GPy/kern/parts/prod_orthogonal.py rename to GPy/kern/_src/prod_orthogonal.py diff --git a/GPy/kern/parts/rational_quadratic.py b/GPy/kern/_src/rational_quadratic.py similarity index 100% rename from GPy/kern/parts/rational_quadratic.py rename to GPy/kern/_src/rational_quadratic.py diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/_src/rbf.py similarity index 87% rename from GPy/kern/parts/rbf.py rename to GPy/kern/_src/rbf.py index 027aa382..e23e9e2c 100644 --- a/GPy/kern/parts/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart +from kern import Kern from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -class RBF(Kernpart): +class RBF(Kern): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -52,30 +52,16 @@ class RBF(Kernpart): lengthscale = np.ones(self.input_dim) self.variance = Param('variance', variance, Logexp()) - + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.lengthscale.add_observer(self, self.update_lengthscale) self.update_lengthscale(self.lengthscale) - + self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() # initializes cache - #self.update_inv_lengthscale(self.lengthscale) - #self.parameters_changed() - # initialize cache - #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) - - # a set of optional args to pass to weave - # self.weave_options = {'headers' : [''], - # 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - # 'extra_link_args' : ['-lgomp']} self.weave_options = {} - def on_input_change(self, X): - #self._K_computations(X, None) - pass - def update_lengthscale(self, l): self.lengthscale2 = np.square(self.lengthscale) @@ -84,23 +70,32 @@ class RBF(Kernpart): self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def K(self, X, X2, target): + def K(self, X, X2=None): self._K_computations(X, X2) - target += self.variance * self._K_dvar + return self.variance * self._K_dvar - def Kdiag(self, X, target): - np.add(target, self.variance, target) + def Kdiag(self, X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret - def psi0(self, Z, mu, S, target): - target += self.variance + def psi0(self, Z, posterior_variational): + mu = posterior_variational.mean + ret = np.empty(mu.shape[0], dtype=np.float64) + ret[:] = self.variance + return ret - def psi1(self, Z, mu, S, target): + def psi1(self, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) - target += self._psi1 + return self._psi1 - def psi2(self, Z, mu, S, target): + def psi2(self, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) - target += self._psi2 + return self._psi2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) @@ -131,7 +126,9 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance self._psi_computations(Z, mu, S) #contributions from psi0: @@ -165,7 +162,43 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance + self._psi_computations(Z, mu, S) + + #psi1 + denominator = (self.lengthscale2 * (self._psi1_denom)) + dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) + grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) + + #psi2 + term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim + term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim + dZ = self._psi2[:, :, :, None] * (term1[None] + term2) + grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) + + grad += self.gradients_X(dL_dKmm, Z, None) + + return grad + + def update_gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, posterior_variational): + mu = posterior_variational.mean + S = posterior_variational.variance + self._psi_computations(Z, mu, S) + #psi1 + tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom + grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) + grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) + #psi2 + tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom + grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) + grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) + + posterior_variational.mean.gradient = grad_mu + posterior_variational.variance.gradient = grad_S + + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: @@ -173,44 +206,15 @@ class RBF(Kernpart): else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) + return np.sum(gradients_X * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self, dL_dKdiag, X, target): - pass + def dKdiag_dX(self, dL_dKdiag, X): + return np.zeros(X.shape[0]) #---------------------------------------# # PSI statistics # #---------------------------------------# - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - #---------------------------------------# # Precomputations # #---------------------------------------# @@ -373,6 +377,7 @@ class RBF(Kernpart): #include #include """ + mu = param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/_src/rbf_inv.py similarity index 100% rename from GPy/kern/parts/rbf_inv.py rename to GPy/kern/_src/rbf_inv.py diff --git a/GPy/kern/parts/rbfcos.py b/GPy/kern/_src/rbfcos.py similarity index 100% rename from GPy/kern/parts/rbfcos.py rename to GPy/kern/_src/rbfcos.py diff --git a/GPy/kern/parts/spline.py b/GPy/kern/_src/spline.py similarity index 100% rename from GPy/kern/parts/spline.py rename to GPy/kern/_src/spline.py diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py new file mode 100644 index 00000000..a6ff9424 --- /dev/null +++ b/GPy/kern/_src/stationary.py @@ -0,0 +1,211 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +from kern import Kern +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +from ... import util +import numpy as np +from scipy import integrate + +class Stationary(Kern): + def __init__(self, input_dim, variance, lengthscale, ARD, name): + super(Stationary, self).__init__(input_dim, name) + self.ARD = ARD + if not ARD: + if lengthscale is None: + lengthscale = np.ones(1) + else: + lengthscale = np.asarray(lengthscale) + assert lengthscale.size == 1, "Only lengthscale needed for non-ARD kernel" + else: + if lengthscale is not None: + lengthscale = np.asarray(lengthscale) + assert lengthscale.size in [1, input_dim], "Bad lengthscales" + if lengthscale.size != input_dim: + lengthscale = np.ones(input_dim)*lengthscale + else: + lengthscale = np.ones(self.input_dim) + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) + self.variance = Param('variance', variance, Logexp()) + assert self.variance.size==1 + self.add_parameters(self.variance, self.lengthscale) + + def _dist(self, X, X2): + if X2 is None: + X2 = X + return X[:, None, :] - X2[None, :, :] + + def _scaled_dist(self, X, X2=None): + return np.sqrt(np.sum(np.square(self._dist(X, X2) / self.lengthscale), -1)) + + def Kdiag(self, X): + ret = np.empty(X.shape[0]) + ret[:] = self.variance + return ret + + def update_gradients_diag(self, dL_dKdiag, X): + self.variance.gradient = np.sum(dL_dKdiag) + self.lengthscale.gradient = 0. + + def update_gradients_full(self, dL_dK, X, X2=None): + K = self.K(X, X2) + self.variance.gradient = np.sum(K * dL_dK)/self.variance + + rinv = self._inv_dist(X, X2) + dL_dr = self.dK_dr(X, X2) * dL_dK + x_xl3 = np.square(self._dist(X, X2)) / self.lengthscale**3 + + if self.ARD: + self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0) + else: + self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum() + + def _inv_dist(self, X, X2=None): + dist = self._scaled_dist(X, X2) + if X2 is None: + nondiag = util.diag.offdiag_view(dist) + nondiag[:] = 1./nondiag + return dist + else: + return 1./np.where(dist != 0., dist, np.inf) + + def gradients_X(self, dL_dK, X, X2=None): + dL_dr = self.dK_dr(X, X2) * dL_dK + invdist = self._inv_dist(X, X2) + ret = np.sum((invdist*dL_dr)[:,:,None]*self._dist(X, X2),1)/self.lengthscale**2 + if X2 is None: + ret *= 2. + return ret + + def gradients_X_diag(self, dL_dKdiag, X): + return np.zeros(X.shape) + + + + +class Exponential(Stationary): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): + super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + dist = self._scaled_dist(X, X2) + return self.variance * np.exp(-0.5 * dist) + + def dK_dr(self, X, X2): + return -0.5*self.K(X, X2) + +class Matern32(Stationary): + """ + Matern 3/2 kernel: + + .. math:: + + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } + + """ + + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'): + super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + dist = self._scaled_dist(X, X2) + return self.variance * (1. + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) + + def dK_dr(self, X, X2): + dist = self._scaled_dist(X, X2) + return -3.*self.variance*dist*np.exp(-np.sqrt(3.)*dist) + + def Gram_matrix(self, F, F1, F2, lower, upper): + """ + Return the Gram matrix of the vector of functions F with respect to the + RKHS norm. The use of this function is limited to input_dim=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.input_dim == 1 + def L(x, i): + return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) + n = F.shape[0] + G = np.zeros((n, n)) + for i in range(n): + for j in range(i, n): + G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] + Flower = np.array([f(lower) for f in F])[:, None] + F1lower = np.array([f(lower) for f in F1])[:, None] + return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) + + +class Matern52(Stationary): + """ + Matern 5/2 kernel: + + .. math:: + + k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } + """ + + def K(self, X, X2=None): + r = self._scaled_dist(X, X2) + return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) + + def dK_dr(self, X, X2): + r = self._scaled_dist(X, X2) + return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r) + + def Gram_matrix(self,F,F1,F2,F3,lower,upper): + """ + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. + + :param F: vector of functions + :type F: np.array + :param F1: vector of derivatives of F + :type F1: np.array + :param F2: vector of second derivatives of F + :type F2: np.array + :param F3: vector of third derivatives of F + :type F3: np.array + :param lower,upper: boundaries of the input domain + :type lower,upper: floats + """ + assert self.input_dim == 1 + def L(x,i): + return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x)) + n = F.shape[0] + G = np.zeros((n,n)) + for i in range(n): + for j in range(i,n): + G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] + G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5)) + Flower = np.array([f(lower) for f in F])[:,None] + F1lower = np.array([f(lower) for f in F1])[:,None] + F2lower = np.array([f(lower) for f in F2])[:,None] + orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T) + orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) + return(1./self.variance* (G_coef*G + orig + orig2)) + + + + +class ExpQuad(Stationary): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'): + super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) + + def K(self, X, X2=None): + r = self._scaled_dist(X, X2) + return self.variance * np.exp(-0.5 * r**2) + + def dK_dr(self, X, X2): + dist = self._scaled_dist(X, X2) + return -dist*self.K(X, X2) + + + diff --git a/GPy/kern/parts/symmetric.py b/GPy/kern/_src/symmetric.py similarity index 100% rename from GPy/kern/parts/symmetric.py rename to GPy/kern/_src/symmetric.py diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/_src/sympy_helpers.cpp similarity index 100% rename from GPy/kern/parts/sympy_helpers.cpp rename to GPy/kern/_src/sympy_helpers.cpp diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/_src/sympy_helpers.h similarity index 100% rename from GPy/kern/parts/sympy_helpers.h rename to GPy/kern/_src/sympy_helpers.h diff --git a/GPy/kern/_src/sympykern.py b/GPy/kern/_src/sympykern.py new file mode 100644 index 00000000..3d6517a8 --- /dev/null +++ b/GPy/kern/_src/sympykern.py @@ -0,0 +1,563 @@ +# Check Matthew Rocklin's blog post. +try: + import sympy as sp + sympy_available=True +except ImportError: + sympy_available=False + exit() + +from sympy.core.cache import clear_cache +from sympy.utilities.codegen import codegen + +try: + from scipy import weave + weave_available = True +except ImportError: + weave_available = False + +import os +current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) +import sys +import numpy as np +import re +import tempfile +import pdb +import ast + +from kernpart import Kernpart +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp +# TODO have this set up in a set up file! +user_code_storage = tempfile.gettempdir() + +class spkern(Kernpart): + """ + A kernel object, where all the hard work in done by sympy. + + :param k: the covariance function + :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... + + To construct a new sympy kernel, you'll need to define: + - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z). + - that's it! we'll extract the variables from the function k. + + Note: + - to handle multiple inputs, call them x_1, z_1, etc + - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. + """ + def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): + + if name is None: + name='sympykern' + if k is None: + raise ValueError, "You must provide an argument for the covariance function." + super(spkern, self).__init__(input_dim, name) + + self._sp_k = k + + # pull the variable names out of the symbolic covariance function. + sp_vars = [e for e in k.atoms() if e.is_Symbol] + self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) + self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) + + # Check that variable names make sense. + assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) + assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) + assert len(self._sp_x)==len(self._sp_z) + x_dim=len(self._sp_x) + + # If it is a multi-output covariance, add an input for indexing the outputs. + self._real_input_dim = x_dim + # Check input dim is number of xs + 1 if output_dim is >1 + assert self.input_dim == x_dim + int(output_dim > 1) + self.output_dim = output_dim + + # extract parameter names from the covariance + thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) + + + # Look for parameters with index (subscripts), they are associated with different outputs. + if self.output_dim>1: + self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) + self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) + + # Make sure parameter appears with both indices! + assert len(self._sp_theta_i)==len(self._sp_theta_j) + assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) + + # Extract names of shared parameters (those without a subscript) + self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] + + self.num_split_params = len(self._sp_theta_i) + self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] + for theta in self._split_theta_names: + setattr(self, theta, Param(theta, np.ones(self.output_dim), None)) + self.add_parameters(getattr(self, theta)) + + #setattr(self, theta, np.ones(self.output_dim)) + + self.num_shared_params = len(self._sp_theta) + #self.num_params = self.num_shared_params+self.num_split_params*self.output_dim + + else: + self.num_split_params = 0 + self._split_theta_names = [] + self._sp_theta = thetas + self.num_shared_params = len(self._sp_theta) + #self.num_params = self.num_shared_params + + # Add parameters to the model. + for theta in self._sp_theta: + val = 1.0 + if param is not None: + if param.has_key(theta): + val = param[theta] + #setattr(self, theta.name, val) + setattr(self, theta.name, Param(theta.name, val, None)) + self.add_parameters(getattr(self, theta.name)) + #deal with param + #self._set_params(self._get_params()) + + # Differentiate with respect to parameters. + self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] + if self.output_dim > 1: + self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] + + # differentiate with respect to input variables. + self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] + + # psi_stats aren't yet implemented. + if False: + self.compute_psi_stats() + + self._code = {} + + # generate the code for the covariance functions + self._gen_code() + + if weave_available: + if False: + extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] + else: + extra_compile_args = [] + + self.weave_kwargs = { + 'support_code': None, #self._function_code, + 'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')], + 'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'], + 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')], + 'extra_compile_args':extra_compile_args, + 'extra_link_args':['-lgomp'], + 'verbose':True} + self.parameters_changed() # initializes caches + + + def __add__(self,other): + return spkern(self._sp_k+other._sp_k) + + def _gen_code(self): + + argument_sequence = self._sp_x+self._sp_z+self._sp_theta + code_list = [('k',self._sp_k)] + # gradients with respect to covariance input + code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)] + # gradient with respect to parameters + code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] + # gradient with respect to multiple output parameters + if self.output_dim > 1: + argument_sequence += self._sp_theta_i + self._sp_theta_j + code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] + # generate c functions from sympy objects + if weave_available: + code_type = "C" + else: + code_type = "PYTHON" + # Need to add the sympy_helpers header in here. + (foo_c,self._function_code), (foo_h,self._function_header) = \ + codegen(code_list, + code_type, + self.name, + argument_sequence=argument_sequence) + + + # Use weave to compute the underlying functions. + if weave_available: + # put the header file where we can find it + f = file(os.path.join(user_code_storage, self.name + '.h'),'w') + f.write(self._function_header) + f.close() + + + if weave_available: + # Substitute any known derivatives which sympy doesn't compute + self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) + # put the cpp file in user code storage (defaults to temp file location) + f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w') + else: + # put the python file in user code storage + f = file(os.path.join(user_code_storage, self.name + '.py'),'w') + f.write(self._function_code) + f.close() + + if weave_available: + # arg_list will store the arguments required for the C code. + input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] + + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) + + # for multiple outputs reverse argument list is also required + if self.output_dim>1: + reverse_input_arg_list = list(input_arg_list) + reverse_input_arg_list.reverse() + + # This gives the parameters for the arg list. + param_arg_list = [shared_params.name for shared_params in self._sp_theta] + arg_list = input_arg_list + param_arg_list + + precompute_list=[] + if self.output_dim > 1: + reverse_arg_list= reverse_input_arg_list + list(param_arg_list) + # For multiple outputs, also need the split parameters. + split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] + split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] + arg_list += split_param_arg_list + reverse_arg_list += split_param_reverse_arg_list + # Extract the right output indices from the inputs. + c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] + precompute_list += c_define_output_indices + reverse_arg_string = ", ".join(reverse_arg_list) + arg_string = ", ".join(arg_list) + precompute_string = "\n".join(precompute_list) + + # Now we use the arguments in code that computes the separate parts. + + # Any precomputations will be done here eventually. + self._precompute = \ + """ + // Precompute code would go here. It will be called when parameters are updated. + """ + + # Here's the code to do the looping for K + self._code['K'] =\ + """ + // _K_code + // Code for computing the covariance function. + int i; + int j; + int n = target_array->dimensions[0]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for + for (i=0;i1: + for i, theta in enumerate(self._sp_theta_i): + grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)] + grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)] + grad_func_list = c_define_output_indices+grad_func_list + + grad_func_string = '\n'.join(grad_func_list) + self._code['dK_d' + theta.name] =\ + """ + int i; + int j; + int n = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;idimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (i=0;i1: + gradX_func_list += c_define_output_indices + gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] + gradX_func_string = "\n".join(gradX_func_list) + + self._code['dK_dX'] = \ + """ + // _dK_dX_code + // Code for computing gradient of covariance with respect to inputs. + int i; + int j; + int n = partial_array->dimensions[0]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; + //#pragma omp parallel for private(j) + for (i=0;idimensions[0]; + int input_dim = X_array->dimensions[1]; + for (int i=0;i1: + arg_names += self._split_theta_names + arg_names += ['output_dim'] + return arg_names + + def _generate_inline(self, code, X, target=None, Z=None, partial=None): + output_dim = self.output_dim + # Need to extract parameters to local variables first + for shared_params in self._sp_theta: + locals()[shared_params.name] = getattr(self, shared_params.name) + + for split_params in self._split_theta_names: + locals()[split_params] = np.asarray(getattr(self, split_params)) + arg_names = self._get_arg_names(target, Z, partial) + + if weave_available: + return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs) + else: + raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented') + + def K(self,X,Z,target): + if Z is None: + self._generate_inline(self._code['K_X'], X, target) + else: + self._generate_inline(self._code['K'], X, target, Z) + + + def Kdiag(self,X,target): + self._generate_inline(self._code['Kdiag'], X, target) + + def _param_grad_helper(self,partial,X,Z,target): + if Z is None: + self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial) + else: + self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial) + + def dKdiag_dtheta(self,partial,X,target): + self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name) + + def gradients_X(self,partial,X,Z,target): + if Z is None: + self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial) + else: + self._generate_inline(self._code['dK_dX'], X, target, Z, partial) + + def dKdiag_dX(self,partial,X,target): + self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial) + + def compute_psi_stats(self): + #define some normal distributions + mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] + Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] + normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] + + #do some integration! + #self._sp_psi0 = ?? + self._sp_psi1 = self._sp_k + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) + sys.stdout.flush() + self._sp_psi1 *= normals[i] + self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) + clear_cache() + self._sp_psi1 = self._sp_psi1.simplify() + + #and here's psi2 (eek!) + zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] + self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) + sys.stdout.flush() + self._sp_psi2 *= normals[i] + self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) + clear_cache() + self._sp_psi2 = self._sp_psi2.simplify() + + def parameters_changed(self): + # Reset the caches + self._cache, self._cache2 = np.empty(shape=(2, 1)) + self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1)) + + def update_gradients_full(self, dL_dK, X): + # Need to extract parameters to local variables first + self._K_computations(X, None) + for shared_params in self._sp_theta: + parameter = getattr(self, shared_params.name) + code = self._code['dK_d' + shared_params.name] + setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) + + for split_params in self._split_theta_names: + parameter = getattr(self, split_params.name) + code = self._code['dK_d' + split_params.name] + setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK)) + + + # def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + # #contributions from Kdiag + # self.variance.gradient = np.sum(dL_dKdiag) + + # #from Knm + # self._K_computations(X, Z) + # self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) + # if self.ARD: + # self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) + + # else: + # self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm) + + # #from Kmm + # self._K_computations(Z, None) + # self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) + # if self.ARD: + # self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) + # else: + # self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) + + + #---------------------------------------# + # Precomputations # + #---------------------------------------# + + def _K_computations(self, X, Z): + if Z is None: + self._generate_inline(self._precompute, X) + else: + self._generate_inline(self._precompute, X, Z=Z) diff --git a/GPy/kern/parts/white.py b/GPy/kern/_src/white.py similarity index 79% rename from GPy/kern/parts/white.py rename to GPy/kern/_src/white.py index c7e4c6dd..d20e2fe1 100644 --- a/GPy/kern/parts/white.py +++ b/GPy/kern/_src/white.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp -class White(Kernpart): +class White(Kern): """ White noise kernel. @@ -20,14 +20,17 @@ class White(Kernpart): self.input_dim = input_dim self.variance = Param('variance', variance, Logexp()) self.add_parameters(self.variance) - self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2,target): + def K(self, X, X2=None): if X2 is None: - target += np.eye(X.shape[0])*self.variance + return np.eye(X.shape[0])*self.variance + else: + return np.zeros((X.shape[0], X2.shape[0])) - def Kdiag(self,X,target): - target += self.variance + def Kdiag(self,X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret def update_gradients_full(self, dL_dK, X): self.variance.gradient = np.trace(dL_dK) @@ -38,14 +41,8 @@ class White(Kernpart): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - pass - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass + def gradients_X(self,dL_dK,X,X2): + return np.zeros_like(X) def psi0(self,Z,mu,S,target): pass # target += self.variance diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py deleted file mode 100644 index 53728d0d..00000000 --- a/GPy/kern/kern.py +++ /dev/null @@ -1,680 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import sys -import numpy as np -import itertools -from parts.prod import Prod as prod -from parts.linear import Linear -from parts.kernpart import Kernpart -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param - -class kern(Parameterized): - def __init__(self, input_dim, parts=[], input_slices=None): - """ - This is the main kernel class for GPy. It handles multiple - (additive) kernel functions, and keeps track of various things - like which parameters live where. - - The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts - are used. - - :param input_dim: The dimensionality of the kernel's input space - :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects - :param input_slices: the slices on the inputs which apply to each kernel - :type input_slices: list of slice objects, or list of bools - - """ - super(kern, self).__init__('kern') - self.add_parameters(*parts) - self.input_dim = input_dim - - if input_slices is None: - self.input_slices = [slice(None) for p in self._parameters_] - else: - assert len(input_slices) == len(self._parameters_) - self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - - for p in self._parameters_: - assert isinstance(p, Kernpart), "bad kernel part" - - def parameters_changed(self): - [p.parameters_changed() for p in self._parameters_] - - def connect_input(self, Xparam): - [p.connect_input(Xparam) for p in self._parameters_] - - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return Parameterized._getstate(self) + [#self._parameters_, - #self.num_params, - self.input_dim, - self.input_slices, - self._param_slices_ - ] - - def _setstate(self, state): - self._param_slices_ = state.pop() - self.input_slices = state.pop() - self.input_dim = state.pop() - #self.num_params = state.pop() - #self._parameters_ = state.pop() - Parameterized._setstate(self, state) - - - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - -# def _transform_gradients(self, g): -# """ -# Apply the transformations of the kernel so that the returned vector -# represents the gradient in the transformed space (i.e. that given by -# get_params_transformed()) -# -# :param g: the gradient vector for the current model, usually created by _param_grad_helper -# """ -# x = self._get_params() -# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) -# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__] -# # for constraint, index in self.constraints.iteritems(): -# # if constraint != __fixed__: -# # g[index] = g[index] * constraint.gradfactor(x[index]) -# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]] -# # if len(self.tied_indices) or len(self.fixed_indices): -# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# # return np.delete(g, to_remove) -# # else: -# if self._fixes_ is not None: return g[self._fixes_] -# return g -# x = self._get_params() -# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] -# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# if len(self.tied_indices) or len(self.fixed_indices): -# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# return np.delete(g, to_remove) -# else: -# return g - - def __add__(self, other): - """ Overloading of the '+' operator. for more control, see self.add """ - return self.add(other) - - def add(self, other, tensor=False): - """ - Add another kernel to this one. - - If Tensor is False, both kernels are defined on the same _space_. then - the created kernel will have the same number of inputs as self and - other (which must be the same). - - If Tensor is True, then the dimensions are stacked 'horizontally', so - that the resulting kernel has self.input_dim + other.input_dim - - :param other: the other kernel to be added - :type other: GPy.kern - - """ - if tensor: - D = self.input_dim + other.input_dim - self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - - newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) - - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.constraints = self.constraints + other.constraints -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - else: - assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - - [newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()] - [newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()] - newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None - - return newkern - - def __call__(self, X, X2=None): - return self.K(X, X2) - - def __mul__(self, other): - """ Here we overload the '*' operator. See self.prod for more information""" - return self.prod(other) - - def __pow__(self, other, tensor=False): - """ - Shortcut for tensor `prod`. - """ - return self.prod(other, tensor=True) - - def prod(self, other, tensor=False): - """ - Multiply two kernels (either on the same space, or on the tensor product of the input space). - - :param other: the other kernel to be added - :type other: GPy.kern - :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool - - """ - K1 = self - K2 = other - #K1 = self.copy() - #K2 = other.copy() - - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim - s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] - - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] - - if tensor: - newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) - else: - newkern = kern(K1.input_dim, newkernparts, slices) - - #newkern._follow_constrains(K1, K2) - return newkern - -# def _follow_constrains(self, K1, K2): -# -# # Build the array that allows to go from the initial indices of the param to the new ones -# K1_param = [] -# n = 0 -# for k1 in K1.parts: -# K1_param += [range(n, n + k1.num_params)] -# n += k1.num_params -# n = 0 -# K2_param = [] -# for k2 in K2.parts: -# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] -# n += k2.num_params -# index_param = [] -# for p1 in K1_param: -# for p2 in K2_param: -# index_param += p1 + p2 -# index_param = np.array(index_param) -# -# # Get the ties and constrains of the kernels before the multiplication -# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices] -# -# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices] -# prev_constr = K1.constraints + K2.constraints -# -# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices] -# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values -# -# # follow the previous ties -# for arr in prev_ties: -# for j in arr: -# index_param[np.where(index_param == j)[0]] = arr[0] -# -# # ties and constrains -# for i in range(K1.num_params + K2.num_params): -# index = np.where(index_param == i)[0] -# if index.size > 1: -# self.tie_params(index) -# for i, t in zip(prev_constr_ind, prev_constr): -# self.constrain(np.where(index_param == i)[0], t) -# -# def _get_params(self): -# return np.hstack(self._parameters_) -# return np.hstack([p._get_params() for p in self._parameters_]) - -# def _set_params(self, x): -# import ipdb;ipdb.set_trace() -# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)] - -# def _get_param_names(self): -# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count -# part_names = np.array([k.name for k in self._parameters_], dtype=np.str) -# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] -# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] -# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] -# -# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) - - def K(self, X, X2=None, which_parts='all'): - """ - Compute the kernel function. - - :param X: the first set of inputs to the kernel - :param X2: (optional) the second set of arguments to the kernel. If X2 - is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts - """ - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target - - def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - - def gradients_X(self, dL_dK, X, X2=None): - """Compute the gradient of the objective function with respect to X. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim)""" - - target = np.zeros_like(X) - if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def Kdiag(self, X, which_parts='all'): - """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 - - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target - - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) - - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) - - return target_mu, target_S - - def plot(self, *args, **kwargs): - """ - See GPy.plotting.matplot_dep.plot - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - kernel_plots.plot(self,*args) - -from GPy.core.model import Model - -class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Model.__init__(self, 'kernel_test_model') - num_samples = 20 - num_samples2 = 10 - if kernel==None: - kernel = GPy.kern.rbf(1) - if X==None: - X = np.random.randn(num_samples, kernel.input_dim) - if dL_dK==None: - if X2==None: - dL_dK = np.ones((X.shape[0], X.shape[0])) - else: - dL_dK = np.ones((X.shape[0], X2.shape[0])) - - self.kernel=kernel - self.add_parameter(kernel) - self.X = X - self.X2 = X2 - self.dL_dK = dL_dK - - def is_positive_definite(self): - v = np.linalg.eig(self.kernel.K(self.X))[0] - if any(v<-10*sys.float_info.epsilon): - return False - else: - return True - - def log_likelihood(self): - return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() - - def _log_likelihood_gradients(self): - raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." - -class Kern_check_dK_dtheta(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) - -class Kern_check_dKdiag_dtheta(Kern_check_model): - """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" - def __init__(self, kernel=None, dL_dK=None, X=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - def parameters_changed(self): - self.kernel.update_gradients_full(self.dL_dK, self.X) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) - -class Kern_check_dK_dX(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - self.remove_parameter(kernel) - self.X = Param('X', self.X) - self.add_parameter(self.X) - def _log_likelihood_gradients(self): - return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() - -class Kern_check_dKdiag_dX(Kern_check_dK_dX): - """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() - -def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): - """ - This function runs on kernels to check the correctness of their - implementation. It checks that the covariance function is positive definite - for a randomly generated data set. - - :param kern: the kernel to be tested. - :type kern: GPy.kern.Kernpart - :param X: X input values to test the covariance function. - :type X: ndarray - :param X2: X2 input values to test the covariance function. - :type X2: ndarray - - """ - pass_checks = True - if X==None: - X = np.random.randn(10, kern.input_dim) - if output_ind is not None: - X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: - X2 = np.random.randn(20, kern.input_dim) - if output_ind is not None: - X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) - - if verbose: - print("Checking covariance function is positive definite.") - result = Kern_check_model(kern, X=X).is_positive_definite() - if result and verbose: - print("Check passed.") - if not result: - print("Positive definite check failed for " + kern.name + " covariance function.") - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt theta.") - result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt X.") - try: - result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - return pass_checks diff --git a/GPy/kern/parts/Brownian.py b/GPy/kern/parts/Brownian.py deleted file mode 100644 index 488e9b7a..00000000 --- a/GPy/kern/parts/Brownian.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np - -def theta(x): - """Heavisdie step function""" - return np.where(x>=0.,1.,0.) - -class Brownian(Kernpart): - """ - Brownian Motion kernel. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: - :type variance: float - """ - def __init__(self,input_dim,variance=1.): - self.input_dim = input_dim - assert self.input_dim==1, "Brownian motion in 1D only" - self.num_params = 1 - self.name = 'Brownian' - self._set_params(np.array([variance]).flatten()) - - def _get_params(self): - return self.variance - - def _set_params(self,x): - assert x.shape==(1,) - self.variance = x - - def _get_param_names(self): - return ['variance'] - - def K(self,X,X2,target): - if X2 is None: - X2 = X - target += self.variance*np.fmin(X,X2.T) - - def Kdiag(self,X,target): - target += self.variance*X.flatten() - - def _param_grad_helper(self,dL_dK,X,X2,target): - if X2 is None: - X2 = X - target += np.sum(np.fmin(X,X2.T)*dL_dK) - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.dot(X.flatten(), dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - raise NotImplementedError, "TODO" - #target += self.variance - #target -= self.variance*theta(X-X2.T) - #if X.shape==X2.shape: - #if np.all(X==X2): - #np.add(target[:,:,0],self.variance*np.diag(X2.flatten()-X.flatten()),target[:,:,0]) - - - def dKdiag_dX(self,dL_dKdiag,X,target): - target += self.variance*dL_dKdiag[:,None] - diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/parts/Matern32.py deleted file mode 100644 index 08fa452c..00000000 --- a/GPy/kern/parts/Matern32.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -from scipy import integrate - -class Matern32(Kernpart): - """ - Matern 3/2 kernel: - - .. math:: - - k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - - """ - - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if ARD == False: - self.num_params = 2 - self.name = 'Mat32' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - self.name = 'Mat32' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance, lengthscale.flatten()))) - - def _get_params(self): - """return the value of the parameters.""" - return np.hstack((self.variance, self.lengthscale)) - - def _set_params(self, x): - """set the value of the parameters.""" - assert x.size == self.num_params - self.variance = x[0] - self.lengthscale = x[1:] - - def _get_param_names(self): - """return parameter names.""" - if self.num_params == 2: - return ['variance', 'lengthscale'] - else: - return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - - def K(self, X, X2, target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target) - - def Kdiag(self, X, target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target, self.variance, target) - - def _param_grad_helper(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) - invdist = 1. / np.where(dist != 0., dist, np.inf) - dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 - # dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar * dL_dK) - if self.ARD == True: - dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis] - # dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) - else: - dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist - # dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl * dL_dK) - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - - else: - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - def Gram_matrix(self, F, F1, F2, lower, upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param F2: vector of second derivatives of F - :type F2: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x, i): - return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) - n = F.shape[0] - G = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] - Flower = np.array([f(lower) for f in F])[:, None] - F1lower = np.array([f(lower) for f in F1])[:, None] - # print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" - # return(G) - return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/parts/Matern52.py deleted file mode 100644 index 7d36254c..00000000 --- a/GPy/kern/parts/Matern52.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -import hashlib -from scipy import integrate - -class Matern52(Kernpart): - """ - Matern 5/2 kernel: - - .. math:: - - k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - - """ - def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if ARD == False: - self.num_params = 2 - self.name = 'Mat52' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - self.name = 'Mat52' - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance,lengthscale.flatten()))) - - def _get_params(self): - """return the value of the parameters.""" - return np.hstack((self.variance,self.lengthscale)) - - def _set_params(self,x): - """set the value of the parameters.""" - assert x.size == self.num_params - self.variance = x[0] - self.lengthscale = x[1:] - - def _get_param_names(self): - """return parameter names.""" - if self.num_params == 2: - return ['variance','lengthscale'] - else: - return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] - - def K(self,X,X2,target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target) - - def Kdiag(self,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target,self.variance,target) - - def _param_grad_helper(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - invdist = 1./np.where(dist!=0.,dist,np.inf) - dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 - dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*dL_dK) - if self.ARD: - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) - else: - dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist - target[1] += np.sum(dl*dL_dK) - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - else: - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) - target += np.sum(gradients_X*dL_dK.T[:,:,None],0) - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass - - def Gram_matrix(self,F,F1,F2,F3,lower,upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param F2: vector of second derivatives of F - :type F2: np.array - :param F3: vector of third derivatives of F - :type F3: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x,i): - return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x)) - n = F.shape[0] - G = np.zeros((n,n)) - for i in range(n): - for j in range(i,n): - G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] - G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5)) - Flower = np.array([f(lower) for f in F])[:,None] - F1lower = np.array([f(lower) for f in F1])[:,None] - F2lower = np.array([f(lower) for f in F2])[:,None] - orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T) - orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) - return(1./self.variance* (G_coef*G + orig + orig2)) - - - diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py deleted file mode 100644 index 0a758f1e..00000000 --- a/GPy/kern/parts/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white diff --git a/GPy/kern/parts/bias.py b/GPy/kern/parts/bias.py deleted file mode 100644 index d2301bcd..00000000 --- a/GPy/kern/parts/bias.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -from ...core.parameterization import Param - -class Bias(Kernpart): - def __init__(self,input_dim,variance=1.,name=None): - """ - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance of the kernel - :type variance: float - """ - super(Bias, self).__init__(input_dim, name) - from ...core.parameterization.transformations import Logexp - self.variance = Param("variance", variance, Logexp()) - self.add_parameter(self.variance) - - def K(self,X,X2,target): - target += self.variance - - def Kdiag(self,X,target): - target += self.variance - - #def dK_dtheta(self,dL_dKdiag,X,X2,target): - #target += dL_dKdiag.sum() - def update_gradients_full(self, dL_dK, X): - self.variance.gradient = dL_dK.sum() - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += dL_dKdiag.sum() - - def gradients_X(self, dL_dK,X, X2, target): - pass - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass - - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - - def psi0(self, Z, mu, S, target): - target += self.variance - - def psi1(self, Z, mu, S, target): - self._psi1 = self.variance - target += self._psi1 - - def psi2(self, Z, mu, S, target): - target += self.variance**2 - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target): - target += dL_dpsi0.sum() - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target): - target += dL_dpsi1.sum() - - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): - target += 2.*self.variance*dL_dpsi2.sum() - - def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target): - pass - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - pass - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - pass - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - pass - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - pass diff --git a/GPy/kern/parts/exponential.py b/GPy/kern/parts/exponential.py deleted file mode 100644 index 372d4d9b..00000000 --- a/GPy/kern/parts/exponential.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -from kernpart import Kernpart -import numpy as np -from scipy import integrate - -class Exponential(Kernpart): - """ - Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2) - - .. math:: - - k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance :math:`\sigma^2` - :type variance: float - :param lengthscale: the vector of lengthscale :math:`\ell_i` - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension. - :type ARD: Boolean - :param name: the name of the kernel - :rtype: kernel object - - """ - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='exp'): - self.input_dim = input_dim - self.ARD = ARD - self.variance = variance - self.name = name - if ARD == False: - self.num_params = 2 - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - #self._set_params(np.hstack((variance, lengthscale.flatten()))) - self.set_as_parameter('variance', 'lengthscale') - -# def _get_params(self): -# """return the value of the parameters.""" -# return np.hstack((self.variance, self.lengthscale)) -# -# def _set_params(self, x): -# """set the value of the parameters.""" -# assert x.size == self.num_params -# self.variance = x[0] -# self.lengthscale = x[1:] -# -# def _get_param_names(self): -# """return parameter names.""" -# if self.num_params == 2: -# return ['variance', 'lengthscale'] -# else: -# return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - - def K(self, X, X2, target): - """Compute the covariance matrix between X and X2.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - np.add(self.variance * np.exp(-dist), target, target) - - def Kdiag(self, X, target): - """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target, self.variance, target) - - def _param_grad_helper(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to the parameters.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) - invdist = 1. / np.where(dist != 0., dist, np.inf) - dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 - dvar = np.exp(-dist) - target[0] += np.sum(dvar * dL_dK) - if self.ARD == True: - dl = self.variance * dvar[:, :, None] * dist2M * invdist[:, :, None] - target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) - else: - dl = self.variance * dvar * dist2M.sum(-1) * invdist - target[1] += np.sum(dl * dL_dK) - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - # NB: derivative of diagonal elements wrt lengthscale is 0 - target[0] += np.sum(dL_dKdiag) - - def gradients_X(self, dL_dK, X, X2, target): - """derivative of the covariance matrix with respect to X.""" - if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] - ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) - gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - def Gram_matrix(self, F, F1, lower, upper): - """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. - - :param F: vector of functions - :type F: np.array - :param F1: vector of derivatives of F - :type F1: np.array - :param lower,upper: boundaries of the input domain - :type lower,upper: floats - """ - assert self.input_dim == 1 - def L(x, i): - return(1. / self.lengthscale * F[i](x) + F1[i](x)) - n = F.shape[0] - G = np.zeros((n, n)) - for i in range(n): - for j in range(i, n): - G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] - Flower = np.array([f(lower) for f in F])[:, None] - return(self.lengthscale / 2. / self.variance * G + 1. / self.variance * np.dot(Flower, Flower.T)) diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/parts/kernpart.py deleted file mode 100644 index 06f1446b..00000000 --- a/GPy/kern/parts/kernpart.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart(Parameterized): - def __init__(self,input_dim,name): - """ - The base class for a kernpart: a positive definite function - which forms part of a covariance function (kernel). - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - super(Kernpart, self).__init__(name) - # the input dimensionality for the covariance - self.input_dim = input_dim - # the number of optimisable parameters - # the name of the covariance function. - # link to parameterized objects - #self._X = None - - def connect_input(self, X): - X.add_observer(self, self.on_input_change) - #self._X = X - - def on_input_change(self, X): - """ - During optimization this function will be called when - the inputs X changed. Use this to update caches dependent - on the inputs X. - """ - # overwrite this to update kernel when inputs X change - pass - - -# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs): -# """ -# :param names: name of parameter to set as parameter -# :param gradient: gradient method to get the gradient of this parameter -# :param index: index of where to place parameter in printing -# :param args, kwargs: additional arguments to gradient -# -# Convenience method to connect Kernpart parameters: -# parameter with name (attribute of this Kernpart) will be set as parameter with following name: -# -# kernel_name + _ + parameter_name -# -# To add the kernels name to the parameter name use this method to -# add parameters. -# """ -# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs) -# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs): -# """ -# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter` -# -# Note: this method adds the kernels name in front of the parameter. -# """ -# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs) -# if index is None: -# self._parameters_.append(p) -# else: -# self._parameters_.insert(index, p) -# self.__dict__[name] = p - #set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable -# def _get_params(self): -# raise NotImplementedError -# def _set_params(self,x): -# raise NotImplementedError -# def _get_param_names(self): -# raise NotImplementedError - def K(self,X,X2,target): - raise NotImplementedError - def Kdiag(self,X,target): - raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): - raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) - def psi0(self,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi1(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dtheta(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi2(self,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def gradients_X(self, dL_dK, X, X2, target): - raise NotImplementedError - def dKdiag_dX(self, dL_dK, X, target): - raise NotImplementedError - def update_gradients_full(self, dL_dK, X): - """Set the gradients of all parameters when doing full (N) inference.""" - raise NotImplementedError - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - """Set the gradients of all parameters when doing sparse (M) inference.""" - raise NotImplementedError - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" - raise NotImplementedError - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/parts/linear.py b/GPy/kern/parts/linear.py deleted file mode 100644 index 828ece11..00000000 --- a/GPy/kern/parts/linear.py +++ /dev/null @@ -1,306 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp - -class Linear(Kernpart): - """ - Linear kernel - - .. math:: - - k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variances: the vector of variances :math:`\sigma^2_i` - :type variances: array or list of the appropriate size (or float if there is only one variance parameter) - :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. - :type ARD: Boolean - :rtype: kernel object - """ - - def __init__(self, input_dim, variances=None, ARD=False, name='linear'): - super(Linear, self).__init__(input_dim, name) - self.ARD = ARD - if ARD == False: - if variances is not None: - variances = np.asarray(variances) - assert variances.size == 1, "Only one variance needed for non-ARD kernel" - else: - variances = np.ones(1) - self._Xcache, self._X2cache = np.empty(shape=(2,)) - else: - if variances is not None: - variances = np.asarray(variances) - assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim" - else: - variances = np.ones(self.input_dim) - - self.variances = Param('variances', variances, Logexp()) - self.variances.gradient = np.zeros(self.variances.shape) - self.add_parameter(self.variances) - self.variances.add_observer(self, self.update_variance) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2 = np.empty(shape=(2, 1)) - - def update_variance(self, v): - self.variances2 = np.square(self.variances) - - def on_input_change(self, X): - self._K_computations(X, None) - - def update_gradients_full(self, dL_dK, X): - self.variances.gradient[:] = 0 - self._param_grad_helper(dL_dK, X, None, self.variances.gradient) - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - tmp = dL_dKdiag[:, None] * X ** 2 - if self.ARD: - self.variances.gradient = tmp.sum(0) - else: - self.variances.gradient = tmp.sum() - self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - self._psi_computations(Z, mu, S) - # psi0: - tmp = dL_dpsi0[:, None] * self.mu2_S - if self.ARD: self.variances.gradient[:] = tmp.sum(0) - else: self.variances.gradient[:] = tmp.sum() - #psi1 - self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient) - #psi2 - tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :]) - if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) - else: self.variances.gradient += tmp.sum() - #from Kmm - self._K_computations(Z, None) - self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - - def K(self, X, X2, target): - if self.ARD: - XX = X * np.sqrt(self.variances) - if X2 is None: - target += tdot(XX) - else: - XX2 = X2 * np.sqrt(self.variances) - target += np.dot(XX, XX2.T) - else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += self.variances * self._dot_product - - def Kdiag(self, X, target): - np.add(target, np.sum(self.variances * np.square(X), -1), target) - - def _param_grad_helper(self, dL_dK, X, X2, target): - if self.ARD: - if X2 is None: - [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] - else: - product = X[:, None, :] * X2[None, :, :] - target += (dL_dK[:, :, None] * product).sum(0).sum(0) - else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += np.sum(self._dot_product * dL_dK) - - def gradients_X(self, dL_dK, X, X2, target): - if X2 is None: - target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) - else: - target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) - - def dKdiag_dX(self,dL_dKdiag,X,target): - target += 2.*self.variances*dL_dKdiag[:,None]*X - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - - def psi0(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += np.sum(self.variances * self.mu2_S, 1) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances) - target_S += dL_dpsi0[:, None] * self.variances - - def psi1(self, Z, mu, S, target): - """the variance, it does nothing""" - self._psi1 = self.K(mu, Z, target) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - """Do nothing for S, it does not affect psi1""" - self._psi_computations(Z, mu, S) - target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self.gradients_X(dL_dpsi1.T, Z, mu, target) - - def psi2(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi2 - - def psi2_new(self,Z,mu,S,target): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - target += tmp[:,:,None]*tmp[:,None,:] + np.sum(S[:,None,None,:]*self.variances**2*Z[None,:,None,:]*Z[None,None,:,:],-1) - - def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target) - result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0) - if self.ARD: - target += result.sum(0).sum(0).sum(0) - else: - target += result.sum() - - def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - self.K(mu,Z,tmp) - self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu) - - Zs = Z*self.variances - Zs_sq = Zs[:,None,:]*Zs[None,:,:] - target_S += (dL_dpsi2[:,:,:,None]*Zs_sq[None,:,:,:]).sum(1).sum(1) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] - AZZA = AZZA + AZZA.swapaxes(1, 2) - AZZA_2 = AZZA/2. - #muAZZA = np.tensordot(mu,AZZA,(-1,0)) - #target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S) - #target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1) - #target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1) - - #Using weave, we can exploiut the symmetry of this problem: - code = """ - int n, m, mm,q,qq; - double factor,tmp; - #pragma omp parallel for private(m,mm,q,qq,factor,tmp) - for(n=0;n - #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} - - N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) - weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], - type_converters=weave.converters.blitz,**weave_options) - - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - #psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :] - #dummy_target = np.zeros_like(target) - #dummy_target += psi2_dZ.sum(0).sum(0) - - AZA = self.variances*self.ZAinner - code=""" - int n,m,mm,q; - #pragma omp parallel for private(n,mm,q) - for(m=0;m - #include - """ - weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} - - N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] - mu, AZA, target, dL_dpsi2 = param_to_array(mu, AZA, target, dL_dpsi2) - weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], - type_converters=weave.converters.blitz,**weave_options) - - - - - - #---------------------------------------# - # Precomputations # - #---------------------------------------# - - def _K_computations(self, X, X2): - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): - self._X = X.copy() - if X2 is None: - self._dot_product = tdot(param_to_array(X)) - self._X2 = None - else: - self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) - - def _psi_computations(self, Z, mu, S): - # here are the "statistics" for psi1 and psi2 - Zv_changed = not (fast_array_equal(Z, self._Z) and fast_array_equal(self.variances, self._variances)) - muS_changed = not (fast_array_equal(mu, self._mu) and fast_array_equal(S, self._S)) - if Zv_changed: - # Z has changed, compute Z specific stuff - # self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim -# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F') -# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])] - self.ZA = Z * self.variances - self._Z = Z.copy() - self._variances = self.variances.copy() - if muS_changed: - self.mu2_S = np.square(mu) + S - self.inner = (mu[:, None, :] * mu[:, :, None]) - diag_indices = np.diag_indices(mu.shape[1], 2) - self.inner[:, diag_indices[0], diag_indices[1]] += S - self._mu, self._S = mu.copy(), S.copy() - if Zv_changed or muS_changed: - self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! - self._psi2 = np.dot(self.ZAinner, self.ZA.T) diff --git a/GPy/kern/parts/prod.py b/GPy/kern/parts/prod.py deleted file mode 100644 index 364c91b3..00000000 --- a/GPy/kern/parts/prod.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from kernpart import Kernpart -from coregionalize import Coregionalize -import numpy as np -import hashlib - -class Prod(Kernpart): - """ - Computes the product of 2 kernels - - :param k1, k2: the kernels to multiply - :type k1, k2: Kernpart - :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces - :type tensor: Boolean - :rtype: kernel object - - """ - def __init__(self,k1,k2,tensor=False): - if tensor: - super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) - self.slice1 = slice(0,k1.input_dim) - self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim) - else: - assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." - super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) - self.slice1 = slice(0,self.input_dim) - self.slice2 = slice(0,self.input_dim) - self.k1 = k1 - self.k2 = k2 - self.add_parameters(self.k1, self.k2) - - #initialize cache - self._X, self._X2 = np.empty(shape=(2,1)) - self._params = None - - def K(self,X,X2,target): - self._K_computations(X,X2) - target += self._K1 * self._K2 - - def K1(self,X, X2): - """Compute the part of the kernel associated with k1.""" - self._K_computations(X, X2) - return self._K1 - - def K2(self, X, X2): - """Compute the part of the kernel associated with k2.""" - self._K_computations(X, X2) - return self._K2 - - def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) - self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - - def _param_grad_helper(self,dL_dK,X,X2,target): - """Derivative of the covariance matrix with respect to the parameters.""" - self._K_computations(X,X2) - if X2 is None: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) - else: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) - - def Kdiag(self,X,target): - """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros(X.shape[0]) - target2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],target1) - self.k2.Kdiag(X[:,self.slice2],target2) - target += target1 * target2 - - - def dKdiag_dtheta(self,dL_dKdiag,X,target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2,target): - """derivative of the covariance matrix with respect to X.""" - self._K_computations(X,X2) - if X2 is None: - if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) - else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): - #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei] - X2 = X - self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) - else: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) - - def dKdiag_dX(self, dL_dKdiag, X, target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) - - self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - - def _K_computations(self,X,X2): - if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): - self._X = X.copy() - self._params == self._get_params().copy() - if X2 is None: - self._X2 = None - self._K1 = np.zeros((X.shape[0],X.shape[0])) - self._K2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X[:,self.slice1],None,self._K1) - self.k2.K(X[:,self.slice2],None,self._K2) - else: - self._X2 = X2.copy() - self._K1 = np.zeros((X.shape[0],X2.shape[0])) - self._K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) - self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) - diff --git a/GPy/kern/parts/ss_rbf.py b/GPy/kern/parts/ss_rbf.py deleted file mode 100644 index cab8fd11..00000000 --- a/GPy/kern/parts/ss_rbf.py +++ /dev/null @@ -1,352 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param - -class SS_RBF(Kernpart): - """ - The RBF kernel for Spike-and-Slab GPLVM - Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: - - .. math:: - - k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2} - - where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. - - :param input_dim: the number of input dimensions - :type input_dim: int - :param variance: the variance of the kernel - :type variance: float - :param lengthscale: the vector of lengthscale of the kernel - :type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter) - :rtype: kernel object - """ - - def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'): - super(RBF, self).__init__(input_dim, name) - self.input_dim = input_dim - - if lengthscale is not None: - lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - lengthscale = np.ones(self.input_dim) - - self.variance = Param('variance', variance) - self.lengthscale = Param('lengthscale', lengthscale) - self.lengthscale.add_observer(self, self.update_lengthscale) - self.add_parameters(self.variance, self.lengthscale) - self.parameters_changed() # initializes cache - - def on_input_change(self, X): - #self._K_computations(X, None) - pass - - def update_lengthscale(self, l): - self.lengthscale2 = np.square(self.lengthscale) - - def parameters_changed(self): - # reset cached results - self._X, self._X2 = np.empty(shape=(2, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - def K(self, X, X2, target): - self._K_computations(X, X2) - target += self.variance * self._K_dvar - - def Kdiag(self, X, target): - np.add(target, self.variance, target) - - def psi0(self, Z, mu, S, target): - target += self.variance - - def psi1(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi1 - - def psi2(self, Z, mu, S, target): - self._psi_computations(Z, mu, S) - target += self._psi2 - - def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.variance.gradient = np.sum(self._K_dvar * dL_dK) - if self.ARD: - self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None) - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - #contributions from Kdiag - self.variance.gradient = np.sum(dL_dKdiag) - - #from Knm - self._K_computations(X, Z) - self.variance.gradient += np.sum(dL_dKnm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z) - - else: - self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - self._psi_computations(Z, mu, S) - - #contributions from psi0: - self.variance.gradient = np.sum(dL_dpsi0) - - #from psi1 - self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance) - d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) - dpsi1_dlength = d_length * dL_dpsi1[:, :, None] - if not self.ARD: - self.lengthscale.gradeint = dpsi1_dlength.sum() - else: - self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0) - - #from psi2 - d_var = 2.*self._psi2 / self.variance - d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom) - - self.variance.gradient += np.sum(dL_dpsi2 * d_var) - dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None] - if not self.ARD: - self.lengthscale.gradient += dpsi2_dlength.sum() - else: - self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) - - #from Kmm - self._K_computations(Z, None) - self.variance.gradient += np.sum(dL_dKmm * self._K_dvar) - if self.ARD: - self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None) - else: - self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) - - def gradients_X(self, dL_dK, X, X2, target): - #if self._X is None or X.base is not self._X.base or X2 is not None: - self._K_computations(X, X2) - if X2 is None: - _K_dist = 2*(X[:, None, :] - X[None, :, :]) - else: - _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. - gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) - - def dKdiag_dX(self, dL_dKdiag, X, target): - pass - - #---------------------------------------# - # PSI statistics # - #---------------------------------------# - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - #---------------------------------------# - # Precomputations # - #---------------------------------------# - - def _K_computations(self, X, X2): - #params = self._get_params() - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)): - #self._X = X.copy() - #self._params_save = params.copy() - if X2 is None: - self._X2 = None - X = X / self.lengthscale - Xsquare = np.sum(np.square(X), 1) - self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :]) - else: - self._X2 = X2.copy() - X = X / self.lengthscale - X2 = X2 / self.lengthscale - self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :]) - self._K_dvar = np.exp(-0.5 * self._K_dist2) - - def _dL_dlengthscales_via_K(self, dL_dK, X, X2): - """ - A helper function for update_gradients_* methods - - Computes the derivative of the objective L wrt the lengthscales via - - dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl) - - assumes self._K_computations has just been called. - - This is only valid if self.ARD=True - """ - target = np.zeros(self.input_dim) - dvardLdK = self._K_dvar * dL_dK - var_len3 = self.variance / np.power(self.lengthscale, 3) - if X2 is None: - # save computation for the symmetrical case - dvardLdK = dvardLdK + dvardLdK.T - code = """ - int q,i,j; - double tmp; - for(q=0; q - #include - """ - weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], - type_converters=weave.converters.blitz, **self.weave_options) - - return mudist, mudist_sq, psi2_exponent, psi2 diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/parts/sympykern.py deleted file mode 100644 index a09d4bfc..00000000 --- a/GPy/kern/parts/sympykern.py +++ /dev/null @@ -1,423 +0,0 @@ -import numpy as np -import sympy as sp -from sympy.utilities.codegen import codegen -from sympy.core.cache import clear_cache -from scipy import weave -import re -import os -import sys -current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) -import tempfile -import pdb -import ast -from kernpart import Kernpart - -class spkern(Kernpart): - """ - A kernel object, where all the hard work in done by sympy. - - :param k: the covariance function - :type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2... - - To construct a new sympy kernel, you'll need to define: - - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z). - - that's it! we'll extract the variables from the function k. - - Note: - - to handle multiple inputs, call them x_1, z_1, etc - - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - """ - def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): - if name is None: - self.name='sympykern' - else: - self.name = name - if k is None: - raise ValueError, "You must provide an argument for the covariance function." - self._sp_k = k - sp_vars = [e for e in k.atoms() if e.is_Symbol] - self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:])) - self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:])) - # Check that variable names make sense. - assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)]) - assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)]) - assert len(self._sp_x)==len(self._sp_z) - self.input_dim = len(self._sp_x) - self._real_input_dim = self.input_dim - if output_dim > 1: - self.input_dim += 1 - assert self.input_dim == input_dim - self.output_dim = output_dim - # extract parameter names - thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name) - - - # Look for parameters with index. - if self.output_dim>1: - self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name) - self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name) - # Make sure parameter appears with both indices! - assert len(self._sp_theta_i)==len(self._sp_theta_j) - assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)]) - - # Extract names of shared parameters - self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j] - - self.num_split_params = len(self._sp_theta_i) - self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i] - for theta in self._split_theta_names: - setattr(self, theta, np.ones(self.output_dim)) - - self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params+self.num_split_params*self.output_dim - - else: - self.num_split_params = 0 - self._split_theta_names = [] - self._sp_theta = thetas - self.num_shared_params = len(self._sp_theta) - self.num_params = self.num_shared_params - - for theta in self._sp_theta: - val = 1.0 - if param is not None: - if param.has_key(theta): - val = param[theta] - setattr(self, theta.name, val) - #deal with param - self._set_params(self._get_params()) - - #Differentiate! - self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] - if self.output_dim > 1: - self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i] - - self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x] - - if False: - self.compute_psi_stats() - - self._gen_code() - - if False: - extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'] - else: - extra_compile_args = [] - - self.weave_kwargs = { - 'support_code':self._function_code, - 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')], - 'headers':['"sympy_helpers.h"'], - 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")], - 'extra_compile_args':extra_compile_args, - 'extra_link_args':['-lgomp'], - 'verbose':True} - - def __add__(self,other): - return spkern(self._sp_k+other._sp_k) - - def _gen_code(self): - #generate c functions from sympy objects - argument_sequence = self._sp_x+self._sp_z+self._sp_theta - code_list = [('k',self._sp_k)] - # gradients with respect to covariance input - code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)] - # gradient with respect to parameters - code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)] - # gradient with respect to multiple output parameters - if self.output_dim > 1: - argument_sequence += self._sp_theta_i + self._sp_theta_j - code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)] - (foo_c,self._function_code), (foo_h,self._function_header) = \ - codegen(code_list, "C",'foobar',argument_sequence=argument_sequence) - #put the header file where we can find it - f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w') - f.write(self._function_header) - f.close() - - # Substitute any known derivatives which sympy doesn't compute - self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) - - # This is the basic argument construction for the C code. - #arg_list = (["X[i*input_dim+%s]"%x.name[2:] for x in self._sp_x] - # + ["Z[j*input_dim+%s]"%z.name[2:] for z in self._sp_z]) - arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x] - + ["Z2(j, %s)"%z.name[2:] for z in self._sp_z]) - if self.output_dim>1: - reverse_arg_list = list(arg_list) - reverse_arg_list.reverse() - - param_arg_list = [shared_params.name for shared_params in self._sp_theta] - arg_list += param_arg_list - - precompute_list=[] - if self.output_dim > 1: - reverse_arg_list+=list(param_arg_list) - split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i] - split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i] - arg_list += split_param_arg_list - reverse_arg_list += split_param_reverse_arg_list - # Extract the right output indices from the inputs. - c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])] - precompute_list += c_define_output_indices - reverse_arg_string = ", ".join(reverse_arg_list) - arg_string = ", ".join(arg_list) - precompute_string = "\n".join(precompute_list) - # Here's the code to do the looping for K - self._K_code =\ - """ - // _K_code - // Code for computing the covariance function. - int i; - int j; - int N = target_array->dimensions[0]; - int num_inducing = target_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for - for (i=0;i1: - grad_func_list += c_define_output_indices - grad_func_list += [' '*16 + 'TARGET1(%i+ii) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += [' '*16 + 'TARGET1(%i+jj) += partial[i*num_inducing+j]*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)] - grad_func_list += ([' '*16 + 'TARGET1(%i) += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)]) - grad_func_string = '\n'.join(grad_func_list) - - self._dK_dtheta_code =\ - """ - // _dK_dtheta_code - // Code for computing gradient of covariance with respect to parameters. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (i=0;i1: - gradX_func_list += c_define_output_indices - gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)] - gradX_func_string = "\n".join(gradX_func_list) - - self._dK_dX_code = \ - """ - // _dK_dX_code - // Code for computing gradient of covariance with respect to inputs. - int i; - int j; - int N = partial_array->dimensions[0]; - int num_inducing = partial_array->dimensions[1]; - int input_dim = X_array->dimensions[1]; - //#pragma omp parallel for private(j) - for (i=0;idimensions[0]; - int input_dim = X_array->dimensions[1]; - for (int i=0;i1: - arg_names += self._split_theta_names - arg_names += ['output_dim'] - return arg_names - - def _weave_inline(self, code, X, target, Z=None, partial=None): - output_dim = self.output_dim - for shared_params in self._sp_theta: - locals()[shared_params.name] = getattr(self, shared_params.name) - - # Need to extract parameters first - for split_params in self._split_theta_names: - locals()[split_params] = getattr(self, split_params) - arg_names = self._get_arg_names(Z, partial) - weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs) - - def K(self,X,Z,target): - if Z is None: - self._weave_inline(self._K_code_X, X, target) - else: - self._weave_inline(self._K_code, X, target, Z) - - - def Kdiag(self,X,target): - self._weave_inline(self._Kdiag_code, X, target) - - def _param_grad_helper(self,partial,X,Z,target): - if Z is None: - self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial) - else: - self._weave_inline(self._dK_dtheta_code, X, target, Z, partial) - - def dKdiag_dtheta(self,partial,X,target): - self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial) - - def gradients_X(self,partial,X,Z,target): - if Z is None: - self._weave_inline(self._dK_dX_code_X, X, target, Z, partial) - else: - self._weave_inline(self._dK_dX_code, X, target, Z, partial) - - def dKdiag_dX(self,partial,X,target): - self._weave.inline(self._dKdiag_dX_code, X, target, Z, partial) - - def compute_psi_stats(self): - #define some normal distributions - mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)] - Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)] - normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] - - #do some integration! - #self._sp_psi0 = ?? - self._sp_psi1 = self._sp_k - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi1 *= normals[i] - self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi1 = self._sp_psi1.simplify() - - #and here's psi2 (eek!) - zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] - self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) - for i in range(self.input_dim): - print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) - sys.stdout.flush() - self._sp_psi2 *= normals[i] - self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) - clear_cache() - self._sp_psi2 = self._sp_psi2.simplify() - - - def _set_params(self,param): - assert param.size == (self.num_params) - for i, shared_params in enumerate(self._sp_theta): - setattr(self, shared_params.name, param[i]) - - if self.output_dim>1: - for i, split_params in enumerate(self._split_theta_names): - start = self.num_shared_params + i*self.output_dim - end = self.num_shared_params + (i+1)*self.output_dim - setattr(self, split_params, param[start:end]) - - - def _get_params(self): - params = np.zeros(0) - for shared_params in self._sp_theta: - params = np.hstack((params, getattr(self, shared_params.name))) - if self.output_dim>1: - for split_params in self._split_theta_names: - params = np.hstack((params, getattr(self, split_params).flatten())) - return params - - def _get_param_names(self): - if self.output_dim>1: - return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)] - else: - return [x.name for x in self._sp_theta] diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 914ca4ae..a8d643b9 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -8,7 +8,7 @@ from ..core import SparseGP from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg -from ..core.parameterization.variational import Normal +from ..core.parameterization.variational import NormalPosterior, NormalPrior class BayesianGPLVM(SparseGP, GPLVM): """ @@ -29,18 +29,20 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = init if X_variance is None: - X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) + X_variance = np.random.uniform(0,.1,X.shape) if Z is None: Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] if kernel is None: - kernel = kern.rbf(input_dim) # + kern.white(input_dim) + kernel = kern.RBF(input_dim) # + kern.white(input_dim) if likelihood is None: likelihood = Gaussian() - self.q = Normal(X, X_variance) + self.q = NormalPosterior(X, X_variance) + self.variational_prior = NormalPrior() + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs) self.add_parameter(self.q, index=0) #self.ensure_default_constraints() @@ -57,34 +59,15 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP._setstate(self, state) - def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) - dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 - dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - - return dL_dmu, dL_dS - - def KL_divergence(self): - var_mean = np.square(self.X).sum() - var_S = np.sum(self.X_variance - np.log(self.X_variance)) - return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data - def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) - - self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.dL_dmuS() - - # dL: - self.q.mean.gradient = dL_dmu - self.q.variance.gradient = dL_dS - - # dKL: - self.q.mean.gradient -= self.X - self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5 + super(BayesianGPLVM, self).parameters_changed() + self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.q) + + self.kern.update_gradients_q_variational(posterior_variational=self.q, Z=self.Z, **self.grad_dict) + + # update for the KL divergence + self.variational_prior.update_gradients_KL(self.q) + def plot_latent(self, plot_inducing=True, *args, **kwargs): """ @@ -157,6 +140,7 @@ class BayesianGPLVM(SparseGP, GPLVM): """ See GPy.plotting.matplot_dep.dim_reduction_plots.plot_steepest_gradient_map """ + import sys assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import dim_reduction_plots diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index a72acc1a..f8957906 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -23,7 +23,7 @@ class GPRegression(GP): def __init__(self, X, Y, kernel=None): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..0423aecd 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,9 +7,25 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import kern +from GPy.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM +class MRD2(Model): + """ + Apply MRD to all given datasets Y in Ylist. + + Y_i in [n x p_i] + + The samples n in the datasets need + to match up, whereas the dimensionality p_d can differ. + + :param [array-like] Ylist: List of datasets to apply MRD on + :param array-like q_mean: mean of starting latent space q in [n x q] + :param array-like q_variance: variance of starting latent space q in [n x q] + :param :class:`~GPy.inference.latent_function_inference + """ + + class MRD(Model): """ Do MRD on given Datasets in Ylist. @@ -48,11 +64,11 @@ class MRD(Model): # sort out the kernels if kernels is None: kernels = [None] * len(likelihood_or_Y_list) - elif isinstance(kernels, kern): + elif isinstance(kernels, Kern): kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] else: assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" - assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" + assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!" assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 74292c05..3f4ea9b0 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -1,8 +1,8 @@ import pylab as pb import numpy as np -from ... import util from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController -from GPy.util.misc import param_to_array +from ...util.misc import param_to_array +from .base_plots import x_frame2D import itertools import Tango from matplotlib.cm import get_cmap @@ -37,7 +37,7 @@ def plot_latent(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - util.plot.Tango.reset() + Tango.reset() if labels is None: labels = np.ones(model.num_data) @@ -46,7 +46,7 @@ def plot_latent(model, labels=None, which_indices=None, X = param_to_array(model.X) # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(X[:, [input_1, input_2]], resolution=resolution) + Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) def plot_function(x): @@ -87,7 +87,7 @@ def plot_latent(model, labels=None, which_indices=None, else: x = X[index, input_1] y = X[index, input_2] - ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) + ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) @@ -120,7 +120,7 @@ def plot_magnification(model, labels=None, which_indices=None, if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - util.plot.Tango.reset() + Tango.reset() if labels is None: labels = np.ones(model.num_data) @@ -128,7 +128,7 @@ def plot_magnification(model, labels=None, which_indices=None, input_1, input_2 = most_significant_input_dimensions(model, which_indices) # first, plot the output variance as a function of the latent space - Xtest, xx, yy, xmin, xmax = util.plot.x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) + Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) def plot_function(x): @@ -165,7 +165,7 @@ def plot_magnification(model, labels=None, which_indices=None, else: x = model.X[index, input_1] y = model.X[index, input_2] - ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) + ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i' % input_1) ax.set_ylabel('latent dimension %i' % input_2) @@ -205,7 +205,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, return dmu_dX[indices, argmax], np.array(labels)[argmax] if ax is None: - fig = pyplot.figure(num=fignum) + fig = pb.figure(num=fignum) ax = fig.add_subplot(111) if data_labels is None: @@ -241,7 +241,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None, ax.legend() ax.figure.tight_layout() if updates: - pyplot.show() + pb.show() clear = raw_input('Enter to continue') if clear.lower() in 'yes' or clear == '': controller.deactivate() diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 19c96bc0..3436c4ff 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -1,13 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import sys import numpy as np import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.parts.linear import Linear +from ...kern import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): @@ -29,22 +28,23 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): xticklabels = [] bars = [] x0 = 0 - for p in kernel._parameters_: - c = Tango.nextMedium() - if hasattr(p, 'ARD') and p.ARD: - if title is None: - ax.set_title('ARD parameters, %s kernel' % p.name) - else: - ax.set_title(title) - if isinstance(p, Linear): - ard_params = p.variances - else: - ard_params = 1. / p.lengthscale - - x = np.arange(x0, x0 + len(ard_params)) - bars.append(ax.bar(x, ard_params, align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) - xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) - x0 += len(ard_params) + #for p in kernel._parameters_: + p = kernel + c = Tango.nextMedium() + if hasattr(p, 'ARD') and p.ARD: + if title is None: + ax.set_title('ARD parameters, %s kernel' % p.name) + else: + ax.set_title(title) + if isinstance(p, Linear): + ard_params = p.variances + else: + ard_params = 1. / p.lengthscale + x = np.arange(x0, x0 + len(ard_params)) + from ...util.misc import param_to_array + bars.append(ax.bar(x, param_to_array(ard_params), align='center', color=c, edgecolor='k', linewidth=1.2, label=p.name.replace("_"," "))) + xticklabels.extend([r"$\mathrm{{{name}}}\ {x}$".format(name=p.name, x=i) for i in np.arange(len(ard_params))]) + x0 += len(ard_params) x = np.arange(x0) transOffset = offset_copy(ax.transData, fig=fig, x=0., y= -2., units='points') diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index c9896116..3d019bfd 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -9,7 +9,7 @@ from ...util.misc import param_to_array def plot_fit(model, plot_limits=None, which_data_rows='all', - which_data_ycols='all', which_parts='all', fixed_inputs=[], + which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): @@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rowsm which_data_ycols and which_parts + using which_data_rowsm which_data_ycols. :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :type plot_limits: np.array @@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type which_data_rows: 'all' or a slice object to slice model.X, model.Y :param which_data_ycols: when the data has several columns (independant outputs), only plot these :type which_data_rows: 'all' or a list of integers - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D @@ -58,7 +56,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', if ax is None: fig = pb.figure(num=fignum) ax = fig.add_subplot(111) - + + X, Y, Z = param_to_array(model.X, model.Y, model.Z) + if model.has_uncertain_inputs(): X_variance = param_to_array(model.q.variance) + #work out what the inputs are for plotting (1D or 2D) fixed_dims = np.array([i for i,v in fixed_inputs]) free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) @@ -68,7 +69,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #define the frame on which to plot resolution = resolution or 200 - Xnew, xmin, xmax = x_frame1D(model.X[:,free_dims], plot_limits=plot_limits) + Xnew, xmin, xmax = x_frame1D(X[:,free_dims], plot_limits=plot_limits) Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid[:,free_dims] = Xnew for i,v in fixed_inputs: @@ -76,30 +77,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #make a prediction on the frame and plot it if plot_raw: - m, v = model._raw_predict(Xgrid, which_parts=which_parts) + m, v = model._raw_predict(Xgrid) lower = m - 2*np.sqrt(v) upper = m + 2*np.sqrt(v) - Y = model.Y + Y = Y else: - m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts) - Y = model.Y + m, v, lower, upper = model.predict(Xgrid) + Y = Y for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) - ax.plot(model.X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) + ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) #optionally plot some samples if samples: #NOTE not tested with fixed_inputs - Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts) + Ysim = model.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. #add error bars for uncertain (if input uncertainty is being modelled) - if hasattr(model,"has_uncertain_inputs"): - ax.errorbar(model.X[which_data, free_dims], model.likelihood.data[which_data, 0], - xerr=2 * np.sqrt(model.X_variance[which_data, free_dims]), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + #if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): + # ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), + # xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), + # ecolor='k', fmt=None, elinewidth=.5, alpha=.5) #set the limits of the plot to some sensible values @@ -111,7 +112,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add inducing inputs (if a sparse model is used) if hasattr(model,"Z"): #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] - Zu = param_to_array(model.Z[:,free_dims]) + Zu = Z[:,free_dims] z_height = ax.get_ylim()[0] ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) @@ -122,7 +123,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #define the frame for plotting on resolution = resolution or 50 - Xnew, _, _, xmin, xmax = x_frame2D(model.X[:,free_dims], plot_limits, resolution) + Xnew, _, _, xmin, xmax = x_frame2D(X[:,free_dims], plot_limits, resolution) Xgrid = np.empty((Xnew.shape[0],model.input_dim)) Xgrid[:,free_dims] = Xnew for i,v in fixed_inputs: @@ -131,15 +132,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: - m, _ = model._raw_predict(Xgrid, which_parts=which_parts) - Y = model.Y + m, _ = model._raw_predict(Xgrid) + Y = Y else: - m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) - Y = model.data + m, _, _, _ = model.predict(Xgrid) + Y = Y for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(model.X[which_data_rows, free_dims[0]], model.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #set the limits of the plot to some sensible values ax.set_xlim(xmin[0], xmax[0]) @@ -151,7 +152,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #add inducing inputs (if a sparse model is used) if hasattr(model,"Z"): #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] - Zu = model.Z[:,free_dims] + Zu = Z[:,free_dims] ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo') else: diff --git a/GPy/testing/index_operations_tests.py b/GPy/testing/index_operations_tests.py index d5ef7007..64b0c908 100644 --- a/GPy/testing/index_operations_tests.py +++ b/GPy/testing/index_operations_tests.py @@ -24,6 +24,18 @@ class Test(unittest.TestCase): self.param_index.remove(one, [1]) self.assertListEqual(self.param_index[one].tolist(), [3]) + def test_shift_left(self): + self.param_index.shift_left(1, 2) + self.assertListEqual(self.param_index[three].tolist(), [2,5]) + self.assertListEqual(self.param_index[two].tolist(), [0,3]) + self.assertListEqual(self.param_index[one].tolist(), [1]) + + def test_shift_right(self): + self.param_index.shift_right(5, 2) + self.assertListEqual(self.param_index[three].tolist(), [2,4,9]) + self.assertListEqual(self.param_index[two].tolist(), [0,7]) + self.assertListEqual(self.param_index[one].tolist(), [3]) + def test_index_view(self): #======================================================================= # 0 1 2 3 4 5 6 7 8 9 diff --git a/GPy/testing/parameterized_tests.py b/GPy/testing/parameterized_tests.py index ff57606a..6f13d294 100644 --- a/GPy/testing/parameterized_tests.py +++ b/GPy/testing/parameterized_tests.py @@ -10,8 +10,8 @@ import numpy as np class Test(unittest.TestCase): def setUp(self): - self.rbf = GPy.kern.rbf(1) - self.white = GPy.kern.white(1) + self.rbf = GPy.kern.RBF(1) + self.white = GPy.kern.White(1) from GPy.core.parameterization import Param from GPy.core.parameterization.transformations import Logistic self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) @@ -39,14 +39,13 @@ class Test(unittest.TestCase): def test_remove_parameter(self): - from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__ + from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp self.white.fix() self.test1.remove_parameter(self.white) self.assertIs(self.test1._fixes_,None) self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) - self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops) - self.assertEquals(self.white.white.constraints._offset, 0) + self.assertEquals(self.white.constraints._offset, 0) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) @@ -57,18 +56,19 @@ class Test(unittest.TestCase): self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0]) self.assertIs(self.white._fixes_,None) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52) + self.test1.remove_parameter(self.white) self.assertIs(self.test1._fixes_,None) self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) - self.assertIs(self.white.constraints,self.white.white.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) - self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) + self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) + self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) def test_add_parameter_already_in_hirarchy(self): self.test1.add_parameter(self.white._parameters_[0]) def test_default_constraints(self): - self.assertIs(self.rbf.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) + self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) from GPy.core.parameterization.transformations import Logexp diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index c10fea4c..f93bb0ec 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -12,6 +12,7 @@ import decorators import classification import subarray_and_sorting import caching +import diag try: import sympy diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 51ba56f3..55e546df 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,46 +1,88 @@ -from ..core.parameterization.array_core import ObservableArray, ParamList +from ..core.parameterization.parameter_core import Observable + class Cacher(object): - def __init__(self, operation, limit=5): + def __init__(self, operation, limit=5, reset_on_first=False): self.limit = int(limit) + self._reset_on_first = reset_on_first self.operation=operation - self.cached_inputs = ParamList([]) + self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] - def __call__(self, X): - assert isinstance(X, ObservableArray) - if X in self.cached_inputs: - i = self.cached_inputs.index(X) + def __call__(self, *args): + if self._reset_on_first: + assert isinstance(args[0], Observable) + args[0].add_observer(self, self.reset) + cached_args = args + else: + cached_args = args[1:] + + + if not all([isinstance(arg, Observable) for arg in cached_args]): + return self.operation(*args) + if cached_args in self.cached_inputs: + i = self.cached_inputs.index(cached_args) if self.inputs_changed[i]: - self.cached_outputs[i] = self.operation(X) + self.cached_outputs[i] = self.operation(*args) self.inputs_changed[i] = False return self.cached_outputs[i] else: if len(self.cached_inputs) == self.limit: - X_ = self.cached_inputs.pop(0) - X_.remove_observer(self) + args_ = self.cached_inputs.pop(0) + [a.remove_observer(self, self.on_cache_changed) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) - self.cached_inputs.append(X) - self.cached_outputs.append(self.operation(X)) + self.cached_inputs.append(cached_args) + self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - X.add_observer(self, self.on_cache_changed) + [a.add_observer(self, self.on_cache_changed) for a in args] return self.cached_outputs[-1] - def on_cache_changed(self, X): - #print id(X) - Xbase = X - while Xbase is not None: - try: - i = self.cached_inputs.index(X) - break - except ValueError: - Xbase = X.base - continue - self.inputs_changed[i] = True + def on_cache_changed(self, arg): + self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] + + def reset(self, obj): + [[a.remove_observer(self, self.reset) for a in args] for args in self.cached_inputs] + self.cached_inputs = [] + self.cached_outputs = [] + self.inputs_changed = [] + + + + +def cache_this(limit=5, reset_on_self=False): + def limited_cache(f): + c = Cacher(f, limit, reset_on_first=reset_on_self) + def f_wrap(*args): + return c(*args) + f_wrap._cacher = c + return f_wrap + return limited_cache + + + + + + + + + + + + + #Xbase = X + #while Xbase is not None: + #try: + #i = self.cached_inputs.index(X) + #break + #except ValueError: + #Xbase = X.base + #continue + #self.inputs_changed[i] = True + + - diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 059a39c3..23f5d0c8 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500): num_in = 1 X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X.sort(axis=0) - rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) - white = GPy.kern.white(num_in, variance=1e-2) + rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,))) + white = GPy.kern.White(num_in, variance=1e-2) kernel = rbf + white K = kernel.K(X) y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1)) diff --git a/GPy/util/diag.py b/GPy/util/diag.py index 3d6b4dc9..3044ed54 100644 --- a/GPy/util/diag.py +++ b/GPy/util/diag.py @@ -11,14 +11,14 @@ import numpy as np def view(A, offset=0): """ Get a view on the diagonal elements of a 2D array. - - This is actually a view (!) on the diagonal of the array, so you can + + This is actually a view (!) on the diagonal of the array, so you can in-place adjust the view. - + :param :class:`ndarray` A: 2 dimensional numpy array :param int offset: view offset to give back (negative entries allowed) :rtype: :class:`ndarray` view of diag(A) - + >>> import numpy as np >>> X = np.arange(9).reshape(3,3) >>> view(X) @@ -36,7 +36,7 @@ def view(A, offset=0): """ from numpy.lib.stride_tricks import as_strided assert A.ndim == 2, "only implemented for 2 dimensions" - assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!" + assert A.shape[0] == A.shape[1], "attempting to get the view of non-square matrix?!" if offset > 0: return as_strided(A[0, offset:], shape=(A.shape[0] - offset, ), strides=((A.shape[0]+1)*A.itemsize, )) elif offset < 0: @@ -44,6 +44,12 @@ def view(A, offset=0): else: return as_strided(A, shape=(A.shape[0], ), strides=((A.shape[0]+1)*A.itemsize, )) +def offdiag_view(A, offset=0): + from numpy.lib.stride_tricks import as_strided + assert A.ndim == 2, "only implemented for 2 dimensions" + Af = as_strided(A, shape=(A.size,), strides=(A.itemsize,)) + return as_strided(Af[(1+offset):], shape=(A.shape[0]-1, A.shape[1]), strides=(A.strides[0] + A.itemsize, A.strides[1])) + def _diag_ufunc(A,b,offset,func): dA = view(A, offset); func(dA,b,dA) return A @@ -51,11 +57,11 @@ def _diag_ufunc(A,b,offset,func): def times(A, b, offset=0): """ Times the view of A with b in place (!). - Returns modified A + Returns modified A Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -67,11 +73,11 @@ multiply = times def divide(A, b, offset=0): """ Divide the view of A by b in place (!). - Returns modified A + Returns modified A Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -84,9 +90,9 @@ def add(A, b, offset=0): Add b to the view of A in place (!). Returns modified A. Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. @@ -99,16 +105,16 @@ def subtract(A, b, offset=0): Subtract b from the view of A in place (!). Returns modified A. Broadcasting is allowed, thus b can be scalar. - + if offset is not zero, make sure b is of right shape! - + :param ndarray A: 2 dimensional array :param ndarray-like b: either one dimensional or scalar :param int offset: same as in view. :rtype: view of A, which is adjusted inplace """ return _diag_ufunc(A, b, offset, np.subtract) - + if __name__ == '__main__': import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() diff --git a/GPy/util/warping_functions.py b/GPy/util/warping_functions.py index 35ad3b80..a0a385e0 100644 --- a/GPy/util/warping_functions.py +++ b/GPy/util/warping_functions.py @@ -3,8 +3,6 @@ import numpy as np -import scipy as sp -import pylab as plt class WarpingFunction(object): """ @@ -39,6 +37,7 @@ class WarpingFunction(object): def plot(self, psi, xmin, xmax): y = np.arange(xmin, xmax, 0.01) f_y = self.f(y, psi) + from matplotlib import pyplot as plt plt.figure() plt.plot(y, f_y) plt.xlabel('y')