From 89e216b6a67cf7c8dd0c2e274299239e94d90ebe Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 13:38:36 +0000 Subject: [PATCH 1/6] moved stuff. much breakage. Ow. --- GPy/kern/{parts => }/Brownian.py | 0 GPy/kern/{parts => }/Matern32.py | 0 GPy/kern/{parts => }/Matern52.py | 0 GPy/kern/{parts => }/ODE_1.py | 0 GPy/kern/__init__.py | 38 +++++++++++++++----- GPy/kern/__init__old.py | 9 +++++ GPy/kern/{parts => }/bias.py | 0 GPy/kern/{parts => }/coregionalize.py | 0 GPy/kern/{parts => }/eq_ode1.py | 0 GPy/kern/{parts => }/exponential.py | 0 GPy/kern/{parts => }/finite_dimensional.py | 0 GPy/kern/{parts => }/fixed.py | 0 GPy/kern/{parts => }/gibbs.py | 0 GPy/kern/{parts => }/hetero.py | 0 GPy/kern/{parts => }/hierarchical.py | 0 GPy/kern/{parts => }/independent_outputs.py | 0 GPy/kern/{parts => }/kernpart.py | 0 GPy/kern/{parts => }/linear.py | 0 GPy/kern/{parts => }/mlp.py | 0 GPy/kern/{parts => }/odekern1.c | 0 GPy/kern/parts/__init__.py | 29 --------------- GPy/kern/{parts => }/periodic_Matern32.py | 0 GPy/kern/{parts => }/periodic_Matern52.py | 0 GPy/kern/{parts => }/periodic_exponential.py | 0 GPy/kern/{parts => }/poly.py | 0 GPy/kern/{parts => }/prod.py | 0 GPy/kern/{parts => }/prod_orthogonal.py | 0 GPy/kern/{parts => }/rational_quadratic.py | 0 GPy/kern/{parts => }/rbf.py | 0 GPy/kern/{parts => }/rbf_inv.py | 0 GPy/kern/{parts => }/rbfcos.py | 0 GPy/kern/{parts => }/spline.py | 0 GPy/kern/{parts => }/ss_rbf.py | 0 GPy/kern/{parts => }/symmetric.py | 0 GPy/kern/{parts => }/sympy_helpers.cpp | 0 GPy/kern/{parts => }/sympy_helpers.h | 0 GPy/kern/{parts => }/sympykern.py | 0 GPy/kern/{parts => }/white.py | 0 38 files changed, 38 insertions(+), 38 deletions(-) rename GPy/kern/{parts => }/Brownian.py (100%) rename GPy/kern/{parts => }/Matern32.py (100%) rename GPy/kern/{parts => }/Matern52.py (100%) rename GPy/kern/{parts => }/ODE_1.py (100%) create mode 100644 GPy/kern/__init__old.py rename GPy/kern/{parts => }/bias.py (100%) rename GPy/kern/{parts => }/coregionalize.py (100%) rename GPy/kern/{parts => }/eq_ode1.py (100%) rename GPy/kern/{parts => }/exponential.py (100%) rename GPy/kern/{parts => }/finite_dimensional.py (100%) rename GPy/kern/{parts => }/fixed.py (100%) rename GPy/kern/{parts => }/gibbs.py (100%) rename GPy/kern/{parts => }/hetero.py (100%) rename GPy/kern/{parts => }/hierarchical.py (100%) rename GPy/kern/{parts => }/independent_outputs.py (100%) rename GPy/kern/{parts => }/kernpart.py (100%) rename GPy/kern/{parts => }/linear.py (100%) rename GPy/kern/{parts => }/mlp.py (100%) rename GPy/kern/{parts => }/odekern1.c (100%) delete mode 100644 GPy/kern/parts/__init__.py rename GPy/kern/{parts => }/periodic_Matern32.py (100%) rename GPy/kern/{parts => }/periodic_Matern52.py (100%) rename GPy/kern/{parts => }/periodic_exponential.py (100%) rename GPy/kern/{parts => }/poly.py (100%) rename GPy/kern/{parts => }/prod.py (100%) rename GPy/kern/{parts => }/prod_orthogonal.py (100%) rename GPy/kern/{parts => }/rational_quadratic.py (100%) rename GPy/kern/{parts => }/rbf.py (100%) rename GPy/kern/{parts => }/rbf_inv.py (100%) rename GPy/kern/{parts => }/rbfcos.py (100%) rename GPy/kern/{parts => }/spline.py (100%) rename GPy/kern/{parts => }/ss_rbf.py (100%) rename GPy/kern/{parts => }/symmetric.py (100%) rename GPy/kern/{parts => }/sympy_helpers.cpp (100%) rename GPy/kern/{parts => }/sympy_helpers.h (100%) rename GPy/kern/{parts => }/sympykern.py (100%) rename GPy/kern/{parts => }/white.py (100%) diff --git a/GPy/kern/parts/Brownian.py b/GPy/kern/Brownian.py similarity index 100% rename from GPy/kern/parts/Brownian.py rename to GPy/kern/Brownian.py diff --git a/GPy/kern/parts/Matern32.py b/GPy/kern/Matern32.py similarity index 100% rename from GPy/kern/parts/Matern32.py rename to GPy/kern/Matern32.py diff --git a/GPy/kern/parts/Matern52.py b/GPy/kern/Matern52.py similarity index 100% rename from GPy/kern/parts/Matern52.py rename to GPy/kern/Matern52.py diff --git a/GPy/kern/parts/ODE_1.py b/GPy/kern/ODE_1.py similarity index 100% rename from GPy/kern/parts/ODE_1.py rename to GPy/kern/ODE_1.py diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index eb4076c3..0a758f1e 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,9 +1,29 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * +import bias +import Brownian +import coregionalize +import exponential +import eq_ode1 +import finite_dimensional +import fixed +import gibbs +import hetero +import hierarchical +import independent_outputs +import linear +import Matern32 +import Matern52 +import mlp +import ODE_1 +import periodic_exponential +import periodic_Matern32 +import periodic_Matern52 +import poly +import prod_orthogonal +import prod +import rational_quadratic +import rbfcos +import rbf +import rbf_inv +import spline +import symmetric +import white diff --git a/GPy/kern/__init__old.py b/GPy/kern/__init__old.py new file mode 100644 index 00000000..eb4076c3 --- /dev/null +++ b/GPy/kern/__init__old.py @@ -0,0 +1,9 @@ +# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from constructors import * +try: + from constructors import rbf_sympy, sympykern # these depend on sympy +except: + pass +from kern import * diff --git a/GPy/kern/parts/bias.py b/GPy/kern/bias.py similarity index 100% rename from GPy/kern/parts/bias.py rename to GPy/kern/bias.py diff --git a/GPy/kern/parts/coregionalize.py b/GPy/kern/coregionalize.py similarity index 100% rename from GPy/kern/parts/coregionalize.py rename to GPy/kern/coregionalize.py diff --git a/GPy/kern/parts/eq_ode1.py b/GPy/kern/eq_ode1.py similarity index 100% rename from GPy/kern/parts/eq_ode1.py rename to GPy/kern/eq_ode1.py diff --git a/GPy/kern/parts/exponential.py b/GPy/kern/exponential.py similarity index 100% rename from GPy/kern/parts/exponential.py rename to GPy/kern/exponential.py diff --git a/GPy/kern/parts/finite_dimensional.py b/GPy/kern/finite_dimensional.py similarity index 100% rename from GPy/kern/parts/finite_dimensional.py rename to GPy/kern/finite_dimensional.py diff --git a/GPy/kern/parts/fixed.py b/GPy/kern/fixed.py similarity index 100% rename from GPy/kern/parts/fixed.py rename to GPy/kern/fixed.py diff --git a/GPy/kern/parts/gibbs.py b/GPy/kern/gibbs.py similarity index 100% rename from GPy/kern/parts/gibbs.py rename to GPy/kern/gibbs.py diff --git a/GPy/kern/parts/hetero.py b/GPy/kern/hetero.py similarity index 100% rename from GPy/kern/parts/hetero.py rename to GPy/kern/hetero.py diff --git a/GPy/kern/parts/hierarchical.py b/GPy/kern/hierarchical.py similarity index 100% rename from GPy/kern/parts/hierarchical.py rename to GPy/kern/hierarchical.py diff --git a/GPy/kern/parts/independent_outputs.py b/GPy/kern/independent_outputs.py similarity index 100% rename from GPy/kern/parts/independent_outputs.py rename to GPy/kern/independent_outputs.py diff --git a/GPy/kern/parts/kernpart.py b/GPy/kern/kernpart.py similarity index 100% rename from GPy/kern/parts/kernpart.py rename to GPy/kern/kernpart.py diff --git a/GPy/kern/parts/linear.py b/GPy/kern/linear.py similarity index 100% rename from GPy/kern/parts/linear.py rename to GPy/kern/linear.py diff --git a/GPy/kern/parts/mlp.py b/GPy/kern/mlp.py similarity index 100% rename from GPy/kern/parts/mlp.py rename to GPy/kern/mlp.py diff --git a/GPy/kern/parts/odekern1.c b/GPy/kern/odekern1.c similarity index 100% rename from GPy/kern/parts/odekern1.c rename to GPy/kern/odekern1.c diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py deleted file mode 100644 index 0a758f1e..00000000 --- a/GPy/kern/parts/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white diff --git a/GPy/kern/parts/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py similarity index 100% rename from GPy/kern/parts/periodic_Matern32.py rename to GPy/kern/periodic_Matern32.py diff --git a/GPy/kern/parts/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py similarity index 100% rename from GPy/kern/parts/periodic_Matern52.py rename to GPy/kern/periodic_Matern52.py diff --git a/GPy/kern/parts/periodic_exponential.py b/GPy/kern/periodic_exponential.py similarity index 100% rename from GPy/kern/parts/periodic_exponential.py rename to GPy/kern/periodic_exponential.py diff --git a/GPy/kern/parts/poly.py b/GPy/kern/poly.py similarity index 100% rename from GPy/kern/parts/poly.py rename to GPy/kern/poly.py diff --git a/GPy/kern/parts/prod.py b/GPy/kern/prod.py similarity index 100% rename from GPy/kern/parts/prod.py rename to GPy/kern/prod.py diff --git a/GPy/kern/parts/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py similarity index 100% rename from GPy/kern/parts/prod_orthogonal.py rename to GPy/kern/prod_orthogonal.py diff --git a/GPy/kern/parts/rational_quadratic.py b/GPy/kern/rational_quadratic.py similarity index 100% rename from GPy/kern/parts/rational_quadratic.py rename to GPy/kern/rational_quadratic.py diff --git a/GPy/kern/parts/rbf.py b/GPy/kern/rbf.py similarity index 100% rename from GPy/kern/parts/rbf.py rename to GPy/kern/rbf.py diff --git a/GPy/kern/parts/rbf_inv.py b/GPy/kern/rbf_inv.py similarity index 100% rename from GPy/kern/parts/rbf_inv.py rename to GPy/kern/rbf_inv.py diff --git a/GPy/kern/parts/rbfcos.py b/GPy/kern/rbfcos.py similarity index 100% rename from GPy/kern/parts/rbfcos.py rename to GPy/kern/rbfcos.py diff --git a/GPy/kern/parts/spline.py b/GPy/kern/spline.py similarity index 100% rename from GPy/kern/parts/spline.py rename to GPy/kern/spline.py diff --git a/GPy/kern/parts/ss_rbf.py b/GPy/kern/ss_rbf.py similarity index 100% rename from GPy/kern/parts/ss_rbf.py rename to GPy/kern/ss_rbf.py diff --git a/GPy/kern/parts/symmetric.py b/GPy/kern/symmetric.py similarity index 100% rename from GPy/kern/parts/symmetric.py rename to GPy/kern/symmetric.py diff --git a/GPy/kern/parts/sympy_helpers.cpp b/GPy/kern/sympy_helpers.cpp similarity index 100% rename from GPy/kern/parts/sympy_helpers.cpp rename to GPy/kern/sympy_helpers.cpp diff --git a/GPy/kern/parts/sympy_helpers.h b/GPy/kern/sympy_helpers.h similarity index 100% rename from GPy/kern/parts/sympy_helpers.h rename to GPy/kern/sympy_helpers.h diff --git a/GPy/kern/parts/sympykern.py b/GPy/kern/sympykern.py similarity index 100% rename from GPy/kern/parts/sympykern.py rename to GPy/kern/sympykern.py diff --git a/GPy/kern/parts/white.py b/GPy/kern/white.py similarity index 100% rename from GPy/kern/parts/white.py rename to GPy/kern/white.py From 20f02a80b420696c131222e0fbf44046dcd2c3ab Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 15:00:48 +0000 Subject: [PATCH 2/6] rbf and white seem to work --- GPy/core/gp.py | 2 +- GPy/kern/__init__.py | 61 +- GPy/kern/__init__old.py | 9 - GPy/kern/{ => _src}/Brownian.py | 0 GPy/kern/{ => _src}/Matern32.py | 0 GPy/kern/{ => _src}/Matern52.py | 0 GPy/kern/{ => _src}/ODE_1.py | 0 GPy/kern/_src/add.py | 264 ++++++++ GPy/kern/{ => _src}/bias.py | 0 GPy/kern/{ => _src}/constructors.py | 0 GPy/kern/{ => _src}/coregionalize.py | 0 GPy/kern/{ => _src}/eq_ode1.py | 0 GPy/kern/{ => _src}/exponential.py | 0 GPy/kern/{ => _src}/finite_dimensional.py | 0 GPy/kern/{ => _src}/fixed.py | 0 GPy/kern/{ => _src}/gibbs.py | 0 GPy/kern/{ => _src}/hetero.py | 0 GPy/kern/{ => _src}/hierarchical.py | 0 GPy/kern/{ => _src}/independent_outputs.py | 0 GPy/kern/_src/kern.py | 336 ++++++++++ GPy/kern/_src/kernpart.py | 60 ++ GPy/kern/{ => _src}/linear.py | 12 +- GPy/kern/{ => _src}/mlp.py | 0 GPy/kern/{ => _src}/odekern1.c | 0 GPy/kern/{ => _src}/periodic_Matern32.py | 0 GPy/kern/{ => _src}/periodic_Matern52.py | 0 GPy/kern/{ => _src}/periodic_exponential.py | 0 GPy/kern/{ => _src}/poly.py | 0 GPy/kern/{ => _src}/prod.py | 6 +- GPy/kern/{ => _src}/prod_orthogonal.py | 0 GPy/kern/{ => _src}/rational_quadratic.py | 0 GPy/kern/{ => _src}/rbf.py | 49 +- GPy/kern/{ => _src}/rbf_inv.py | 0 GPy/kern/{ => _src}/rbfcos.py | 0 GPy/kern/{ => _src}/spline.py | 0 GPy/kern/{ => _src}/ss_rbf.py | 0 GPy/kern/{ => _src}/symmetric.py | 0 GPy/kern/{ => _src}/sympy_helpers.cpp | 0 GPy/kern/{ => _src}/sympy_helpers.h | 0 GPy/kern/{ => _src}/sympykern.py | 0 GPy/kern/{ => _src}/white.py | 28 +- GPy/kern/kern.py | 680 -------------------- GPy/kern/kernpart.py | 176 ----- GPy/models/mrd.py | 6 +- GPy/plotting/matplot_dep/kernel_plots.py | 2 +- 45 files changed, 737 insertions(+), 954 deletions(-) delete mode 100644 GPy/kern/__init__old.py rename GPy/kern/{ => _src}/Brownian.py (100%) rename GPy/kern/{ => _src}/Matern32.py (100%) rename GPy/kern/{ => _src}/Matern52.py (100%) rename GPy/kern/{ => _src}/ODE_1.py (100%) create mode 100644 GPy/kern/_src/add.py rename GPy/kern/{ => _src}/bias.py (100%) rename GPy/kern/{ => _src}/constructors.py (100%) rename GPy/kern/{ => _src}/coregionalize.py (100%) rename GPy/kern/{ => _src}/eq_ode1.py (100%) rename GPy/kern/{ => _src}/exponential.py (100%) rename GPy/kern/{ => _src}/finite_dimensional.py (100%) rename GPy/kern/{ => _src}/fixed.py (100%) rename GPy/kern/{ => _src}/gibbs.py (100%) rename GPy/kern/{ => _src}/hetero.py (100%) rename GPy/kern/{ => _src}/hierarchical.py (100%) rename GPy/kern/{ => _src}/independent_outputs.py (100%) create mode 100644 GPy/kern/_src/kern.py create mode 100644 GPy/kern/_src/kernpart.py rename GPy/kern/{ => _src}/linear.py (98%) rename GPy/kern/{ => _src}/mlp.py (100%) rename GPy/kern/{ => _src}/odekern1.c (100%) rename GPy/kern/{ => _src}/periodic_Matern32.py (100%) rename GPy/kern/{ => _src}/periodic_Matern52.py (100%) rename GPy/kern/{ => _src}/periodic_exponential.py (100%) rename GPy/kern/{ => _src}/poly.py (100%) rename GPy/kern/{ => _src}/prod.py (98%) rename GPy/kern/{ => _src}/prod_orthogonal.py (100%) rename GPy/kern/{ => _src}/rational_quadratic.py (100%) rename GPy/kern/{ => _src}/rbf.py (92%) rename GPy/kern/{ => _src}/rbf_inv.py (100%) rename GPy/kern/{ => _src}/rbfcos.py (100%) rename GPy/kern/{ => _src}/spline.py (100%) rename GPy/kern/{ => _src}/ss_rbf.py (100%) rename GPy/kern/{ => _src}/symmetric.py (100%) rename GPy/kern/{ => _src}/sympy_helpers.cpp (100%) rename GPy/kern/{ => _src}/sympy_helpers.h (100%) rename GPy/kern/{ => _src}/sympykern.py (100%) rename GPy/kern/{ => _src}/white.py (77%) delete mode 100644 GPy/kern/kern.py delete mode 100644 GPy/kern/kernpart.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index d769678e..10ba8e6b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -43,7 +43,7 @@ class GP(Model): else: self.Y_metadata = None - assert isinstance(kernel, kern.kern) + assert isinstance(kernel, kern.Kern) self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 0a758f1e..2098bd76 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,29 +1,32 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white +from rbf import RBF +from white import White +from kern import Kern +#import bias +#import Brownian +#import coregionalize +#import exponential +#import eq_ode1 +#import finite_dimensional +#import fixed +#import gibbs +#import hetero +#import hierarchical +#import independent_outputs +#import linear +#import Matern32 +#import Matern52 +#import mlp +#import ODE_1 +#import periodic_exponential +#import periodic_Matern32 +#import periodic_Matern52 +#import poly +#import prod_orthogonal +#import prod +#import rational_quadratic +#import rbfcos +#import rbf +#import rbf_inv +#import spline +#import symmetric +#import white diff --git a/GPy/kern/__init__old.py b/GPy/kern/__init__old.py deleted file mode 100644 index eb4076c3..00000000 --- a/GPy/kern/__init__old.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * diff --git a/GPy/kern/Brownian.py b/GPy/kern/_src/Brownian.py similarity index 100% rename from GPy/kern/Brownian.py rename to GPy/kern/_src/Brownian.py diff --git a/GPy/kern/Matern32.py b/GPy/kern/_src/Matern32.py similarity index 100% rename from GPy/kern/Matern32.py rename to GPy/kern/_src/Matern32.py diff --git a/GPy/kern/Matern52.py b/GPy/kern/_src/Matern52.py similarity index 100% rename from GPy/kern/Matern52.py rename to GPy/kern/_src/Matern52.py diff --git a/GPy/kern/ODE_1.py b/GPy/kern/_src/ODE_1.py similarity index 100% rename from GPy/kern/ODE_1.py rename to GPy/kern/_src/ODE_1.py diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py new file mode 100644 index 00000000..8d916941 --- /dev/null +++ b/GPy/kern/_src/add.py @@ -0,0 +1,264 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from linear import Linear +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param +from kern import Kern + +class Add(Kern): + def __init__(self, subkerns, tensor): + assert all([isinstance(k, Kern) for k in subkerns]) + if tensor: + input_dim = sum([k.input_dim for k in subkerns]) + self.input_slices = [] + n = 0 + for k in subkerns: + self.input_slices.append(slice(n, n+k.input_dim)) + n += k.input_dim + else: + assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + input_dim = subkerns[0].input_dim + self.input_slices = [slice(None) for k in subkerns] + super(Add, self).__init__(input_dim, 'add') + self.add_parameters(*subkerns) + + + def K(self, X, X2=None, which_parts='all'): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handles this as X2 == X. + :param which_parts: a list of booleans detailing whether to include + each of the part functions. By default, 'all' + indicates all parts + """ + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0])) + [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + else: + target = np.zeros((X.shape[0], X2.shape[0])) + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return target + + def update_gradients_full(self, dL_dK, X): + [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] + + def _param_grad_helper(self, dL_dK, X, X2=None): + """ + Compute the gradient of the covariance function with respect to the parameters. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: Np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim) + + returns: dL_dtheta + """ + assert X.shape[1] == self.input_dim + target = np.zeros(self.size) + if X2 is None: + [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + else: + [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + + return self._transform_gradients(target) + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros_like(X) + if X2 is None: + [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + else: + [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def Kdiag(self, X, which_parts='all'): + """Compute the diagonal of the covariance function for inputs X.""" + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + target = np.zeros(X.shape[0]) + [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] + return target + + def dKdiag_dtheta(self, dL_dKdiag, X): + """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" + assert X.shape[1] == self.input_dim + assert dL_dKdiag.size == X.shape[0] + target = np.zeros(self.size) + [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + return self._transform_gradients(target) + + def dKdiag_dX(self, dL_dKdiag, X): + assert X.shape[1] == self.input_dim + target = np.zeros_like(X) + [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def psi0(self, Z, mu, S): + target = np.zeros(mu.shape[0]) + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): + target = np.zeros(self.size) + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): + target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi1(self, Z, mu, S): + target = np.zeros((mu.shape[0], Z.shape[0])) + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): + target = np.zeros((self.size)) + [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): + """return shapes are num_samples,num_inducing,input_dim""" + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi2(self, Z, mu, S): + """ + Computer the psi2 statistics for the covariance function. + + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) + :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) + :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) + + """ + target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) + [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: input_slices needed + crossterms = 0 + + for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): + if i_s1 == i_s2: + # TODO psi1 this must be faster/better/precached/more nice + tmp1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) + tmp2 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + + prod = np.multiply(tmp1, tmp2) + crossterms += prod[:, :, None] + prod[:, None, :] + + target += crossterms + return target + + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): + """Gradient of the psi2 statistics with respect to the parameters.""" + target = np.zeros(self.size) + [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + + # compute the "cross" terms + # TODO: better looping, input_slices + for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): + p1, p2 = self._parameters_[i1], self._parameters_[i2] +# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] + ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + + return self._transform_gradients(target) + + def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + # target *= 2 + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) + + return target * 2 + + def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + + return target_mu, target_S + + def plot(self, *args, **kwargs): + """ + See GPy.plotting.matplot_dep.plot + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + kernel_plots.plot(self,*args) + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return Parameterized._getstate(self) + [#self._parameters_, + self.input_dim, + self.input_slices, + self._param_slices_ + ] + + def _setstate(self, state): + self._param_slices_ = state.pop() + self.input_slices = state.pop() + self.input_dim = state.pop() + Parameterized._setstate(self, state) + + diff --git a/GPy/kern/bias.py b/GPy/kern/_src/bias.py similarity index 100% rename from GPy/kern/bias.py rename to GPy/kern/_src/bias.py diff --git a/GPy/kern/constructors.py b/GPy/kern/_src/constructors.py similarity index 100% rename from GPy/kern/constructors.py rename to GPy/kern/_src/constructors.py diff --git a/GPy/kern/coregionalize.py b/GPy/kern/_src/coregionalize.py similarity index 100% rename from GPy/kern/coregionalize.py rename to GPy/kern/_src/coregionalize.py diff --git a/GPy/kern/eq_ode1.py b/GPy/kern/_src/eq_ode1.py similarity index 100% rename from GPy/kern/eq_ode1.py rename to GPy/kern/_src/eq_ode1.py diff --git a/GPy/kern/exponential.py b/GPy/kern/_src/exponential.py similarity index 100% rename from GPy/kern/exponential.py rename to GPy/kern/_src/exponential.py diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/_src/finite_dimensional.py similarity index 100% rename from GPy/kern/finite_dimensional.py rename to GPy/kern/_src/finite_dimensional.py diff --git a/GPy/kern/fixed.py b/GPy/kern/_src/fixed.py similarity index 100% rename from GPy/kern/fixed.py rename to GPy/kern/_src/fixed.py diff --git a/GPy/kern/gibbs.py b/GPy/kern/_src/gibbs.py similarity index 100% rename from GPy/kern/gibbs.py rename to GPy/kern/_src/gibbs.py diff --git a/GPy/kern/hetero.py b/GPy/kern/_src/hetero.py similarity index 100% rename from GPy/kern/hetero.py rename to GPy/kern/_src/hetero.py diff --git a/GPy/kern/hierarchical.py b/GPy/kern/_src/hierarchical.py similarity index 100% rename from GPy/kern/hierarchical.py rename to GPy/kern/_src/hierarchical.py diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/_src/independent_outputs.py similarity index 100% rename from GPy/kern/independent_outputs.py rename to GPy/kern/_src/independent_outputs.py diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py new file mode 100644 index 00000000..af362498 --- /dev/null +++ b/GPy/kern/_src/kern.py @@ -0,0 +1,336 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param + + +class Kern(Parameterized): + def __init__(self,input_dim,name): + """ + The base class for a kernel: a positive definite function + which forms of a covariance function (kernel). + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + super(Kern, self).__init__(name) + self.input_dim = input_dim + + def K(self,X,X2,target): + raise NotImplementedError + def Kdiag(self,X,target): + raise NotImplementedError + def _param_grad_helper(self,dL_dK,X,X2,target): + raise NotImplementedError + def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? + # In the base case compute this by calling _param_grad_helper. Need to + # override for stationary covariances (for example) to save + # time. + for i in range(X.shape[0]): + self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) + def psi0(self,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi1(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi2(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def gradients_X(self, dL_dK, X, X2, target): + raise NotImplementedError + def dKdiag_dX(self, dL_dK, X, target): + raise NotImplementedError + def update_gradients_full(self, dL_dK, X): + """Set the gradients of all parameters when doing full (N) inference.""" + raise NotImplementedError + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + """Set the gradients of all parameters when doing sparse (M) inference.""" + raise NotImplementedError + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" + raise NotImplementedError + + def plot_ARD(self, *args): + """If an ARD kernel is present, plot a bar representation using matplotlib + + See GPy.plotting.matplot_dep.plot_ARD + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + return kernel_plots.plot_ARD(self,*args) + + + def __add__(self, other): + """ Overloading of the '+' operator. for more control, see self.add """ + return self.add(other) + + def add(self, other, tensor=False): + """ + Add another kernel to this one. + + If Tensor is False, both kernels are defined on the same _space_. then + the created kernel will have the same number of inputs as self and + other (which must be the same). + + If Tensor is True, then the dimensions are stacked 'horizontally', so + that the resulting kernel has self.input_dim + other.input_dim + + :param other: the other kernel to be added + :type other: GPy.kern + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from add import Add + return Add([self, other], tensor) + + def __call__(self, X, X2=None): + return self.K(X, X2) + + def __mul__(self, other): + """ Here we overload the '*' operator. See self.prod for more information""" + return self.prod(other) + + def __pow__(self, other, tensor=False): + """ + Shortcut for tensor `prod`. + """ + return self.prod(other, tensor=True) + + def prod(self, other, tensor=False): + """ + Multiply two kernels (either on the same space, or on the tensor product of the input space). + + :param other: the other kernel to be added + :type other: GPy.kern + :param tensor: whether or not to use the tensor space (default is false). + :type tensor: bool + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from prod import Prod + return Prod(self, other, tensor) + + +from GPy.core.model import Model + +class Kern_check_model(Model): + """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Model.__init__(self, 'kernel_test_model') + num_samples = 20 + num_samples2 = 10 + if kernel==None: + kernel = GPy.kern.rbf(1) + if X==None: + X = np.random.randn(num_samples, kernel.input_dim) + if dL_dK==None: + if X2==None: + dL_dK = np.ones((X.shape[0], X.shape[0])) + else: + dL_dK = np.ones((X.shape[0], X2.shape[0])) + + self.kernel=kernel + self.add_parameter(kernel) + self.X = X + self.X2 = X2 + self.dL_dK = dL_dK + + def is_positive_definite(self): + v = np.linalg.eig(self.kernel.K(self.X))[0] + if any(v<-10*sys.float_info.epsilon): + return False + else: + return True + + def log_likelihood(self): + return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() + + def _log_likelihood_gradients(self): + raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." + +class Kern_check_dK_dtheta(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + + def _log_likelihood_gradients(self): + return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) + + + + + +class Kern_check_dKdiag_dtheta(Kern_check_model): + """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" + def __init__(self, kernel=None, dL_dK=None, X=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + def parameters_changed(self): + self.kernel.update_gradients_full(self.dL_dK, self.X) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) + +class Kern_check_dK_dX(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.remove_parameter(kernel) + self.X = Param('X', self.X) + self.add_parameter(self.X) + def _log_likelihood_gradients(self): + return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() + +class Kern_check_dKdiag_dX(Kern_check_dK_dX): + """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() + +def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): + """ + This function runs on kernels to check the correctness of their + implementation. It checks that the covariance function is positive definite + for a randomly generated data set. + + :param kern: the kernel to be tested. + :type kern: GPy.kern.Kernpart + :param X: X input values to test the covariance function. + :type X: ndarray + :param X2: X2 input values to test the covariance function. + :type X2: ndarray + + """ + pass_checks = True + if X==None: + X = np.random.randn(10, kern.input_dim) + if output_ind is not None: + X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) + if X2==None: + X2 = np.random.randn(20, kern.input_dim) + if output_ind is not None: + X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) + + if verbose: + print("Checking covariance function is positive definite.") + result = Kern_check_model(kern, X=X).is_positive_definite() + if result and verbose: + print("Check passed.") + if not result: + print("Positive definite check failed for " + kern.name + " covariance function.") + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt theta.") + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt X.") + try: + result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + return pass_checks diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py new file mode 100644 index 00000000..097ed741 --- /dev/null +++ b/GPy/kern/_src/kernpart.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +#from ...core.parameterized.Parameterized import set_as_parameter +from ...core.parameterization import Parameterized + +class Kernpart_stationary(Kernpart): + def __init__(self, input_dim, lengthscale=None, ARD=False): + self.input_dim = input_dim + self.ARD = ARD + if not ARD: + self.num_params = 2 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" + else: + self.lengthscale = np.ones(1) + else: + self.num_params = self.input_dim + 1 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" + else: + self.lengthscale = np.ones(self.input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + + def _set_params(self, x): + self.lengthscale = x + self.lengthscale2 = np.square(self.lengthscale) + # reset cached results + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + + + def dKdiag_dtheta(self, dL_dKdiag, X, target): + # For stationary covariances, derivative of diagonal elements + # wrt lengthscale is 0. + target[0] += np.sum(dL_dKdiag) + + def dKdiag_dX(self, dL_dK, X, target): + pass # true for all stationary kernels + + +class Kernpart_inner(Kernpart): + def __init__(self,input_dim): + """ + The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + Kernpart.__init__(self, input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/linear.py b/GPy/kern/_src/linear.py similarity index 98% rename from GPy/kern/linear.py rename to GPy/kern/_src/linear.py index 828ece11..ab77d4e6 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/_src/linear.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class Linear(Kernpart): +class Linear(Kern): """ Linear kernel diff --git a/GPy/kern/mlp.py b/GPy/kern/_src/mlp.py similarity index 100% rename from GPy/kern/mlp.py rename to GPy/kern/_src/mlp.py diff --git a/GPy/kern/odekern1.c b/GPy/kern/_src/odekern1.c similarity index 100% rename from GPy/kern/odekern1.c rename to GPy/kern/_src/odekern1.c diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/_src/periodic_Matern32.py similarity index 100% rename from GPy/kern/periodic_Matern32.py rename to GPy/kern/_src/periodic_Matern32.py diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/_src/periodic_Matern52.py similarity index 100% rename from GPy/kern/periodic_Matern52.py rename to GPy/kern/_src/periodic_Matern52.py diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/_src/periodic_exponential.py similarity index 100% rename from GPy/kern/periodic_exponential.py rename to GPy/kern/_src/periodic_exponential.py diff --git a/GPy/kern/poly.py b/GPy/kern/_src/poly.py similarity index 100% rename from GPy/kern/poly.py rename to GPy/kern/_src/poly.py diff --git a/GPy/kern/prod.py b/GPy/kern/_src/prod.py similarity index 98% rename from GPy/kern/prod.py rename to GPy/kern/_src/prod.py index 364c91b3..08221de7 100644 --- a/GPy/kern/prod.py +++ b/GPy/kern/_src/prod.py @@ -1,17 +1,17 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern from coregionalize import Coregionalize import numpy as np import hashlib -class Prod(Kernpart): +class Prod(Kern): """ Computes the product of 2 kernels :param k1, k2: the kernels to multiply - :type k1, k2: Kernpart + :type k1, k2: Kern :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :type tensor: Boolean :rtype: kernel object diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/_src/prod_orthogonal.py similarity index 100% rename from GPy/kern/prod_orthogonal.py rename to GPy/kern/_src/prod_orthogonal.py diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/_src/rational_quadratic.py similarity index 100% rename from GPy/kern/rational_quadratic.py rename to GPy/kern/_src/rational_quadratic.py diff --git a/GPy/kern/rbf.py b/GPy/kern/_src/rbf.py similarity index 92% rename from GPy/kern/rbf.py rename to GPy/kern/_src/rbf.py index 027aa382..36e454e3 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class RBF(Kernpart): +class RBF(Kern): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -52,30 +52,16 @@ class RBF(Kernpart): lengthscale = np.ones(self.input_dim) self.variance = Param('variance', variance, Logexp()) - + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.lengthscale.add_observer(self, self.update_lengthscale) self.update_lengthscale(self.lengthscale) - + self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() # initializes cache - #self.update_inv_lengthscale(self.lengthscale) - #self.parameters_changed() - # initialize cache - #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) - - # a set of optional args to pass to weave - # self.weave_options = {'headers' : [''], - # 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - # 'extra_link_args' : ['-lgomp']} self.weave_options = {} - def on_input_change(self, X): - #self._K_computations(X, None) - pass - def update_lengthscale(self, l): self.lengthscale2 = np.square(self.lengthscale) @@ -84,13 +70,16 @@ class RBF(Kernpart): self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def K(self, X, X2, target): + def K(self, X, X2=None): self._K_computations(X, X2) - target += self.variance * self._K_dvar + return self.variance * self._K_dvar - def Kdiag(self, X, target): - np.add(target, self.variance, target) + def Kdiag(self, X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret + #TODO: remove TARGET! def psi0(self, Z, mu, S, target): target += self.variance @@ -165,7 +154,7 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: @@ -173,10 +162,10 @@ class RBF(Kernpart): else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) + return np.sum(gradients_X * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self, dL_dKdiag, X, target): - pass + def dKdiag_dX(self, dL_dKdiag, X): + return np.zeros(X.shape[0]) #---------------------------------------# # PSI statistics # diff --git a/GPy/kern/rbf_inv.py b/GPy/kern/_src/rbf_inv.py similarity index 100% rename from GPy/kern/rbf_inv.py rename to GPy/kern/_src/rbf_inv.py diff --git a/GPy/kern/rbfcos.py b/GPy/kern/_src/rbfcos.py similarity index 100% rename from GPy/kern/rbfcos.py rename to GPy/kern/_src/rbfcos.py diff --git a/GPy/kern/spline.py b/GPy/kern/_src/spline.py similarity index 100% rename from GPy/kern/spline.py rename to GPy/kern/_src/spline.py diff --git a/GPy/kern/ss_rbf.py b/GPy/kern/_src/ss_rbf.py similarity index 100% rename from GPy/kern/ss_rbf.py rename to GPy/kern/_src/ss_rbf.py diff --git a/GPy/kern/symmetric.py b/GPy/kern/_src/symmetric.py similarity index 100% rename from GPy/kern/symmetric.py rename to GPy/kern/_src/symmetric.py diff --git a/GPy/kern/sympy_helpers.cpp b/GPy/kern/_src/sympy_helpers.cpp similarity index 100% rename from GPy/kern/sympy_helpers.cpp rename to GPy/kern/_src/sympy_helpers.cpp diff --git a/GPy/kern/sympy_helpers.h b/GPy/kern/_src/sympy_helpers.h similarity index 100% rename from GPy/kern/sympy_helpers.h rename to GPy/kern/_src/sympy_helpers.h diff --git a/GPy/kern/sympykern.py b/GPy/kern/_src/sympykern.py similarity index 100% rename from GPy/kern/sympykern.py rename to GPy/kern/_src/sympykern.py diff --git a/GPy/kern/white.py b/GPy/kern/_src/white.py similarity index 77% rename from GPy/kern/white.py rename to GPy/kern/_src/white.py index c7e4c6dd..7750267f 100644 --- a/GPy/kern/white.py +++ b/GPy/kern/_src/white.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class White(Kernpart): +class White(Kern): """ White noise kernel. @@ -22,12 +22,14 @@ class White(Kernpart): self.add_parameters(self.variance) self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2,target): + def K(self,X,X2): if X2 is None: - target += np.eye(X.shape[0])*self.variance + return np.eye(X.shape[0])*self.variance - def Kdiag(self,X,target): - target += self.variance + def Kdiag(self,X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret def update_gradients_full(self, dL_dK, X): self.variance.gradient = np.trace(dL_dK) @@ -38,14 +40,8 @@ class White(Kernpart): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - pass - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass + def gradients_X(self,dL_dK,X,X2): + return np.zeros_like(X) def psi0(self,Z,mu,S,target): pass # target += self.variance diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py deleted file mode 100644 index 53728d0d..00000000 --- a/GPy/kern/kern.py +++ /dev/null @@ -1,680 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import sys -import numpy as np -import itertools -from parts.prod import Prod as prod -from parts.linear import Linear -from parts.kernpart import Kernpart -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param - -class kern(Parameterized): - def __init__(self, input_dim, parts=[], input_slices=None): - """ - This is the main kernel class for GPy. It handles multiple - (additive) kernel functions, and keeps track of various things - like which parameters live where. - - The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts - are used. - - :param input_dim: The dimensionality of the kernel's input space - :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects - :param input_slices: the slices on the inputs which apply to each kernel - :type input_slices: list of slice objects, or list of bools - - """ - super(kern, self).__init__('kern') - self.add_parameters(*parts) - self.input_dim = input_dim - - if input_slices is None: - self.input_slices = [slice(None) for p in self._parameters_] - else: - assert len(input_slices) == len(self._parameters_) - self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - - for p in self._parameters_: - assert isinstance(p, Kernpart), "bad kernel part" - - def parameters_changed(self): - [p.parameters_changed() for p in self._parameters_] - - def connect_input(self, Xparam): - [p.connect_input(Xparam) for p in self._parameters_] - - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return Parameterized._getstate(self) + [#self._parameters_, - #self.num_params, - self.input_dim, - self.input_slices, - self._param_slices_ - ] - - def _setstate(self, state): - self._param_slices_ = state.pop() - self.input_slices = state.pop() - self.input_dim = state.pop() - #self.num_params = state.pop() - #self._parameters_ = state.pop() - Parameterized._setstate(self, state) - - - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - -# def _transform_gradients(self, g): -# """ -# Apply the transformations of the kernel so that the returned vector -# represents the gradient in the transformed space (i.e. that given by -# get_params_transformed()) -# -# :param g: the gradient vector for the current model, usually created by _param_grad_helper -# """ -# x = self._get_params() -# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) -# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__] -# # for constraint, index in self.constraints.iteritems(): -# # if constraint != __fixed__: -# # g[index] = g[index] * constraint.gradfactor(x[index]) -# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]] -# # if len(self.tied_indices) or len(self.fixed_indices): -# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# # return np.delete(g, to_remove) -# # else: -# if self._fixes_ is not None: return g[self._fixes_] -# return g -# x = self._get_params() -# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] -# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# if len(self.tied_indices) or len(self.fixed_indices): -# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# return np.delete(g, to_remove) -# else: -# return g - - def __add__(self, other): - """ Overloading of the '+' operator. for more control, see self.add """ - return self.add(other) - - def add(self, other, tensor=False): - """ - Add another kernel to this one. - - If Tensor is False, both kernels are defined on the same _space_. then - the created kernel will have the same number of inputs as self and - other (which must be the same). - - If Tensor is True, then the dimensions are stacked 'horizontally', so - that the resulting kernel has self.input_dim + other.input_dim - - :param other: the other kernel to be added - :type other: GPy.kern - - """ - if tensor: - D = self.input_dim + other.input_dim - self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - - newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) - - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.constraints = self.constraints + other.constraints -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - else: - assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - - [newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()] - [newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()] - newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None - - return newkern - - def __call__(self, X, X2=None): - return self.K(X, X2) - - def __mul__(self, other): - """ Here we overload the '*' operator. See self.prod for more information""" - return self.prod(other) - - def __pow__(self, other, tensor=False): - """ - Shortcut for tensor `prod`. - """ - return self.prod(other, tensor=True) - - def prod(self, other, tensor=False): - """ - Multiply two kernels (either on the same space, or on the tensor product of the input space). - - :param other: the other kernel to be added - :type other: GPy.kern - :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool - - """ - K1 = self - K2 = other - #K1 = self.copy() - #K2 = other.copy() - - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim - s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] - - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] - - if tensor: - newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) - else: - newkern = kern(K1.input_dim, newkernparts, slices) - - #newkern._follow_constrains(K1, K2) - return newkern - -# def _follow_constrains(self, K1, K2): -# -# # Build the array that allows to go from the initial indices of the param to the new ones -# K1_param = [] -# n = 0 -# for k1 in K1.parts: -# K1_param += [range(n, n + k1.num_params)] -# n += k1.num_params -# n = 0 -# K2_param = [] -# for k2 in K2.parts: -# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] -# n += k2.num_params -# index_param = [] -# for p1 in K1_param: -# for p2 in K2_param: -# index_param += p1 + p2 -# index_param = np.array(index_param) -# -# # Get the ties and constrains of the kernels before the multiplication -# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices] -# -# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices] -# prev_constr = K1.constraints + K2.constraints -# -# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices] -# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values -# -# # follow the previous ties -# for arr in prev_ties: -# for j in arr: -# index_param[np.where(index_param == j)[0]] = arr[0] -# -# # ties and constrains -# for i in range(K1.num_params + K2.num_params): -# index = np.where(index_param == i)[0] -# if index.size > 1: -# self.tie_params(index) -# for i, t in zip(prev_constr_ind, prev_constr): -# self.constrain(np.where(index_param == i)[0], t) -# -# def _get_params(self): -# return np.hstack(self._parameters_) -# return np.hstack([p._get_params() for p in self._parameters_]) - -# def _set_params(self, x): -# import ipdb;ipdb.set_trace() -# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)] - -# def _get_param_names(self): -# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count -# part_names = np.array([k.name for k in self._parameters_], dtype=np.str) -# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] -# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] -# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] -# -# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) - - def K(self, X, X2=None, which_parts='all'): - """ - Compute the kernel function. - - :param X: the first set of inputs to the kernel - :param X2: (optional) the second set of arguments to the kernel. If X2 - is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts - """ - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target - - def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - - def gradients_X(self, dL_dK, X, X2=None): - """Compute the gradient of the objective function with respect to X. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim)""" - - target = np.zeros_like(X) - if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def Kdiag(self, X, which_parts='all'): - """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 - - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target - - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) - - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) - - return target_mu, target_S - - def plot(self, *args, **kwargs): - """ - See GPy.plotting.matplot_dep.plot - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - kernel_plots.plot(self,*args) - -from GPy.core.model import Model - -class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Model.__init__(self, 'kernel_test_model') - num_samples = 20 - num_samples2 = 10 - if kernel==None: - kernel = GPy.kern.rbf(1) - if X==None: - X = np.random.randn(num_samples, kernel.input_dim) - if dL_dK==None: - if X2==None: - dL_dK = np.ones((X.shape[0], X.shape[0])) - else: - dL_dK = np.ones((X.shape[0], X2.shape[0])) - - self.kernel=kernel - self.add_parameter(kernel) - self.X = X - self.X2 = X2 - self.dL_dK = dL_dK - - def is_positive_definite(self): - v = np.linalg.eig(self.kernel.K(self.X))[0] - if any(v<-10*sys.float_info.epsilon): - return False - else: - return True - - def log_likelihood(self): - return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() - - def _log_likelihood_gradients(self): - raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." - -class Kern_check_dK_dtheta(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) - -class Kern_check_dKdiag_dtheta(Kern_check_model): - """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" - def __init__(self, kernel=None, dL_dK=None, X=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - def parameters_changed(self): - self.kernel.update_gradients_full(self.dL_dK, self.X) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) - -class Kern_check_dK_dX(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - self.remove_parameter(kernel) - self.X = Param('X', self.X) - self.add_parameter(self.X) - def _log_likelihood_gradients(self): - return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() - -class Kern_check_dKdiag_dX(Kern_check_dK_dX): - """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() - -def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): - """ - This function runs on kernels to check the correctness of their - implementation. It checks that the covariance function is positive definite - for a randomly generated data set. - - :param kern: the kernel to be tested. - :type kern: GPy.kern.Kernpart - :param X: X input values to test the covariance function. - :type X: ndarray - :param X2: X2 input values to test the covariance function. - :type X2: ndarray - - """ - pass_checks = True - if X==None: - X = np.random.randn(10, kern.input_dim) - if output_ind is not None: - X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: - X2 = np.random.randn(20, kern.input_dim) - if output_ind is not None: - X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) - - if verbose: - print("Checking covariance function is positive definite.") - result = Kern_check_model(kern, X=X).is_positive_definite() - if result and verbose: - print("Check passed.") - if not result: - print("Positive definite check failed for " + kern.name + " covariance function.") - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt theta.") - result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt X.") - try: - result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - return pass_checks diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py deleted file mode 100644 index 06f1446b..00000000 --- a/GPy/kern/kernpart.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart(Parameterized): - def __init__(self,input_dim,name): - """ - The base class for a kernpart: a positive definite function - which forms part of a covariance function (kernel). - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - super(Kernpart, self).__init__(name) - # the input dimensionality for the covariance - self.input_dim = input_dim - # the number of optimisable parameters - # the name of the covariance function. - # link to parameterized objects - #self._X = None - - def connect_input(self, X): - X.add_observer(self, self.on_input_change) - #self._X = X - - def on_input_change(self, X): - """ - During optimization this function will be called when - the inputs X changed. Use this to update caches dependent - on the inputs X. - """ - # overwrite this to update kernel when inputs X change - pass - - -# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs): -# """ -# :param names: name of parameter to set as parameter -# :param gradient: gradient method to get the gradient of this parameter -# :param index: index of where to place parameter in printing -# :param args, kwargs: additional arguments to gradient -# -# Convenience method to connect Kernpart parameters: -# parameter with name (attribute of this Kernpart) will be set as parameter with following name: -# -# kernel_name + _ + parameter_name -# -# To add the kernels name to the parameter name use this method to -# add parameters. -# """ -# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs) -# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs): -# """ -# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter` -# -# Note: this method adds the kernels name in front of the parameter. -# """ -# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs) -# if index is None: -# self._parameters_.append(p) -# else: -# self._parameters_.insert(index, p) -# self.__dict__[name] = p - #set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable -# def _get_params(self): -# raise NotImplementedError -# def _set_params(self,x): -# raise NotImplementedError -# def _get_param_names(self): -# raise NotImplementedError - def K(self,X,X2,target): - raise NotImplementedError - def Kdiag(self,X,target): - raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): - raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) - def psi0(self,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi1(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dtheta(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi2(self,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def gradients_X(self, dL_dK, X, X2, target): - raise NotImplementedError - def dKdiag_dX(self, dL_dK, X, target): - raise NotImplementedError - def update_gradients_full(self, dL_dK, X): - """Set the gradients of all parameters when doing full (N) inference.""" - raise NotImplementedError - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - """Set the gradients of all parameters when doing sparse (M) inference.""" - raise NotImplementedError - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" - raise NotImplementedError - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..b4f987ea 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import kern +from GPy.kern.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): @@ -48,11 +48,11 @@ class MRD(Model): # sort out the kernels if kernels is None: kernels = [None] * len(likelihood_or_Y_list) - elif isinstance(kernels, kern): + elif isinstance(kernels, Kern): kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] else: assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" - assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" + assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!" assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 19c96bc0..80350475 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.parts.linear import Linear +from ...kern.linear import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): From 493506408ca09dc62e9871b8d3c06019a046fa75 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 15:01:35 +0000 Subject: [PATCH 3/6] =?UTF-8?q?init=20for=20src=20dir=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GPy/kern/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 2098bd76..7760f48f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,6 @@ -from rbf import RBF -from white import White -from kern import Kern +from _src.rbf import RBF +from _src.white import White +from _src.kern import Kern #import bias #import Brownian #import coregionalize From 92d71384b77aca1a5a2190ce2062624670fea9a8 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 17:37:18 +0000 Subject: [PATCH 4/6] deleted kernpart, prod and add seem to work okay. --- GPy/core/gp.py | 26 ++++----- GPy/examples/regression.py | 38 ++++++------ GPy/kern/__init__.py | 1 + GPy/kern/_src/add.py | 70 ++++------------------ GPy/kern/_src/coregionalize.py | 12 ++-- GPy/kern/_src/kern.py | 4 +- GPy/kern/_src/kernpart.py | 60 ------------------- GPy/kern/_src/linear.py | 8 +-- GPy/kern/_src/prod.py | 74 +++++++----------------- GPy/kern/_src/rbf.py | 8 +-- GPy/kern/_src/white.py | 6 +- GPy/models/gp_regression.py | 2 +- GPy/models/mrd.py | 2 +- GPy/plotting/matplot_dep/kernel_plots.py | 2 +- GPy/plotting/matplot_dep/models_plots.py | 16 +++-- GPy/util/datasets.py | 4 +- 16 files changed, 95 insertions(+), 238 deletions(-) delete mode 100644 GPy/kern/_src/kernpart.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 10ba8e6b..2dcf0e14 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -70,7 +70,7 @@ class GP(Model): def log_likelihood(self): return self._log_marginal_likelihood - def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): + def _raw_predict(self, _Xnew, full_cov=False): """ Internal helper function for making predictions, does not account for normalization or likelihood @@ -80,29 +80,27 @@ class GP(Model): diagonal of the covariance is returned. """ - Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T + Kx = self.kern.K(_Xnew, self.X).T #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) WiKx = np.dot(self.posterior.woodbury_inv, Kx) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(_Xnew, which_parts=which_parts) + Kxx = self.kern.K(_Xnew) #var = Kxx - tdot(LiKx.T) var = np.dot(Kx.T, WiKx) else: - Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(_Xnew) #var = Kxx - np.sum(LiKx*LiKx, 0) var = Kxx - np.sum(WiKx*Kx, 0) var = var.reshape(-1, 1) return mu, var - def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): + def predict(self, Xnew, full_cov=False, **likelihood_args): """ Predict the function(s) at the new point(s) Xnew. :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool @@ -118,13 +116,13 @@ class GP(Model): """ #predict the latent function values - mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + mu, var = self._raw_predict(Xnew, full_cov=full_cov) # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) return mean, var, _025pm, _975pm - def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. @@ -132,13 +130,11 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov) + m, v = self._raw_predict(X, full_cov=full_cov) v = v.reshape(m.size,-1) if len(v.shape)==3 else v if not full_cov: Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T @@ -147,7 +143,7 @@ class GP(Model): return Ysim - def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None): + def posterior_samples(self,X,size=10, full_cov=True,noise_model=None): """ Samples the posterior GP at the points X. @@ -155,15 +151,13 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :param noise_model: for mixed noise likelihood, the noise model to use in the samples. :type noise_model: integer. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov) + Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) if isinstance(self.likelihood, Gaussian): noise_std = np.sqrt(self.likelihood._get_params()) Ysim += np.random.normal(0,noise_std,Ysim.shape) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 55567051..5cac1857 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True): Y = np.vstack((Y1, Y2)) #build the kernel - k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k1 = GPy.kern.RBF(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalize(2,1) k = k1**k2 m = GPy.models.GPRegression(X, Y, kernel=k) @@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True): # Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 # Y = np.vstack((Y1, Y2)) # -# k1 = GPy.kern.rbf(1) +# k1 = GPy.kern.RBF(1) # m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) # m.constrain_fixed('.*rbf_var', 1.) # m.optimize(max_iters=100) @@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True): Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], np.random.randint(0, 4, num_inducing)[:, None])) - k1 = GPy.kern.rbf(1) + k1 = GPy.kern.RBF(1) k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k = k1**k2 @@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 data['Y'] = data['Y'] - np.mean(data['Y']) - lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) + lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF) if plot: pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() @@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) - kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) + # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m['noise_variance'] = np.random.uniform(1e-3, 1) @@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 ax.set_ylim(ylim) return m # (models, lls) -def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF): """ Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. @@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): optimizer='scg' x_len = 30 X = np.linspace(0, 10, x_len)[:, None] - f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X)) Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] - kern = GPy.kern.rbf(1) + kern = GPy.kern.RBF(1) poisson_lik = GPy.likelihoods.Poisson() laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() @@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) - kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) + kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1]) m = GPy.models.GPRegression(X, Y, kernel) # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) @@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) #kernel += GPy.kern.bias(X.shape[1]) X_variance = np.ones(X.shape) * 0.5 m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) @@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti X = np.random.uniform(-3., 3., (num_samples, 1)) Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 # construct kernel - rbf = GPy.kern.rbf(1) + rbf = GPy.kern.RBF(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m.checkgrad(verbose=1) @@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt Y[inan] = np.nan # construct kernel - rbf = GPy.kern.rbf(2) + rbf = GPy.kern.RBF(2) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) @@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): # likelihood = GPy.likelihoods.Gaussian(Y) Z = np.random.uniform(-3., 3., (7, 1)) - k = GPy.kern.rbf(1) + k = GPy.kern.RBF(1) # create simple GP Model - no input uncertainty on this one - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) @@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): print m # the same Model with uncertainty - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) if plot: diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 7760f48f..214e230f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern +Linear = 'foo' #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d916941..8d81674b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -5,8 +5,8 @@ import sys import numpy as np import itertools from linear import Linear -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param from kern import Kern class Add(Kern): @@ -27,7 +27,7 @@ class Add(Kern): self.add_parameters(*subkerns) - def K(self, X, X2=None, which_parts='all'): + def K(self, X, X2=None): """ Compute the kernel function. @@ -35,52 +35,22 @@ class Add(Kern): :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts """ - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target + return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -93,33 +63,15 @@ class Add(Kern): target = np.zeros_like(X) if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target - def Kdiag(self, X, which_parts='all'): + def Kdiag(self, X): """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target + return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index 8b2f17e8..69fc27ef 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, James Hensman and Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np from scipy import weave from ...core.parameterization import Param -class Coregionalize(Kernpart): +class Coregionalize(Kern): """ Covariance function for intrinsic/linear coregionalization models @@ -133,6 +133,8 @@ class Coregionalize(Kernpart): #dkappa = dL_dKdiag_small #target += np.hstack([dW.flatten(),dkappa]) - def gradients_X(self,dL_dK,X,X2,target): - #NOTE In this case, pass is equivalent to returning zero. - pass + def gradients_X(self,dL_dK,X,X2): + if X2 is None: + return np.zeros((X.shape[0], X.shape[0])) + else: + return np.zeros((X.shape[0], X2.shape[0])) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index af362498..b5b84305 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -4,8 +4,8 @@ import sys import numpy as np import itertools -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param class Kern(Parameterized): diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py deleted file mode 100644 index 097ed741..00000000 --- a/GPy/kern/_src/kernpart.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index ab77d4e6..5083c8de 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class Linear(Kern): """ diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 08221de7..e0d069b2 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -35,64 +35,36 @@ class Prod(Kern): self._X, self._X2 = np.empty(shape=(2,1)) self._params = None - def K(self,X,X2,target): + def K(self, X, X2=None): self._K_computations(X,X2) - target += self._K1 * self._K2 - - def K1(self,X, X2): - """Compute the part of the kernel associated with k1.""" - self._K_computations(X, X2) - return self._K1 - - def K2(self, X, X2): - """Compute the part of the kernel associated with k2.""" - self._K_computations(X, X2) - return self._K2 + return self._K1 * self._K2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def _param_grad_helper(self,dL_dK,X,X2,target): - """Derivative of the covariance matrix with respect to the parameters.""" - self._K_computations(X,X2) - if X2 is None: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) - else: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) - - def Kdiag(self,X,target): + def Kdiag(self, X): """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros(X.shape[0]) - target2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],target1) - self.k2.Kdiag(X[:,self.slice2],target2) - target += target1 * target2 + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self): + pass + #wtf goes here?? + #def dKdiag_dtheta(self,dL_dKdiag,X,target): + #K1 = np.zeros(X.shape[0]) + #K2 = np.zeros(X.shape[0]) + #self.k1.Kdiag(X[:,self.slice1],K1) + #self.k2.Kdiag(X[:,self.slice2],K2) + #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) + #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - def dKdiag_dtheta(self,dL_dKdiag,X,target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) if X2 is None: - if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) - else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): - #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei] - X2 = X - self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) else: self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) @@ -112,14 +84,10 @@ class Prod(Kern): self._params == self._get_params().copy() if X2 is None: self._X2 = None - self._K1 = np.zeros((X.shape[0],X.shape[0])) - self._K2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X[:,self.slice1],None,self._K1) - self.k2.K(X[:,self.slice2],None,self._K2) + self._K1 = self.k1.K(X[:,self.slice1],None) + self._K2 = self.k2.K(X[:,self.slice2],None) else: self._X2 = X2.copy() - self._K1 = np.zeros((X.shape[0],X2.shape[0])) - self._K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) - self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) + self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) + self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 36e454e3..eb713433 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class RBF(Kern): """ diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 7750267f..2be73389 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -3,8 +3,8 @@ from kern import Kern import numpy as np -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class White(Kern): """ @@ -25,6 +25,8 @@ class White(Kern): def K(self,X,X2): if X2 is None: return np.eye(X.shape[0])*self.variance + else: + return np.zeros((X.shape[0], X2.shape[0])) def Kdiag(self,X): ret = np.ones(X.shape[0]) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index a72acc1a..f8957906 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -23,7 +23,7 @@ class GPRegression(GP): def __init__(self, X, Y, kernel=None): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index b4f987ea..acc6e11a 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import Kern +from GPy.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 80350475..30157294 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.linear import Linear +from ...kern import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index c9896116..75ba39d9 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -9,7 +9,7 @@ from ...util.misc import param_to_array def plot_fit(model, plot_limits=None, which_data_rows='all', - which_data_ycols='all', which_parts='all', fixed_inputs=[], + which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): @@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rowsm which_data_ycols and which_parts + using which_data_rowsm which_data_ycols. :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :type plot_limits: np.array @@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type which_data_rows: 'all' or a slice object to slice model.X, model.Y :param which_data_ycols: when the data has several columns (independant outputs), only plot these :type which_data_rows: 'all' or a list of integers - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D @@ -76,12 +74,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #make a prediction on the frame and plot it if plot_raw: - m, v = model._raw_predict(Xgrid, which_parts=which_parts) + m, v = model._raw_predict(Xgrid) lower = m - 2*np.sqrt(v) upper = m + 2*np.sqrt(v) Y = model.Y else: - m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts) + m, v, lower, upper = model.predict(Xgrid) Y = model.Y for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) @@ -89,7 +87,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #optionally plot some samples if samples: #NOTE not tested with fixed_inputs - Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts) + Ysim = model.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. @@ -131,10 +129,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: - m, _ = model._raw_predict(Xgrid, which_parts=which_parts) + m, _ = model._raw_predict(Xgrid) Y = model.Y else: - m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) + m, _, _, _ = model.predict(Xgrid) Y = model.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 059a39c3..23f5d0c8 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500): num_in = 1 X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X.sort(axis=0) - rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) - white = GPy.kern.white(num_in, variance=1e-2) + rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,))) + white = GPy.kern.White(num_in, variance=1e-2) kernel = rbf + white K = kernel.K(X) y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1)) From de51ad638a0ea12469a881d32f7524eeb4ac3082 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 19 Feb 2014 22:23:07 +0000 Subject: [PATCH 5/6] prod now seems to work for sparse --- GPy/core/sparse_gp.py | 13 ++++++------- GPy/kern/_src/add.py | 2 +- GPy/kern/_src/linear.py | 16 ++++++++-------- GPy/kern/_src/prod.py | 39 +++++++++++++++++---------------------- GPy/kern/_src/rbf.py | 2 +- GPy/kern/_src/white.py | 3 +-- 6 files changed, 34 insertions(+), 41 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index edb8d8f6..128dfca3 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -68,22 +68,21 @@ class SparseGP(GP): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) self._update_gradients_Z(add=False) - def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ Make a prediction for the latent function values """ if X_variance_new is None: - Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + Kx = self.kern.K(self.Z, Xnew) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) else: - Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(Xnew) var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0) else: - # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d81674b..edb82ef0 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -46,7 +46,7 @@ class Add(Kern): [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index 5083c8de..b3765774 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -43,16 +43,16 @@ class Linear(Kern): assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim" else: variances = np.ones(self.input_dim) - + self.variances = Param('variances', variances, Logexp()) - self.variances.gradient = np.zeros(self.variances.shape) + #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) self.variances.add_observer(self, self.update_variance) # initialize cache self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._X, self._X2 = np.empty(shape=(2, 1)) - + def update_variance(self, v): self.variances2 = np.square(self.variances) @@ -62,7 +62,7 @@ class Linear(Kern): def update_gradients_full(self, dL_dK, X): self.variances.gradient[:] = 0 self._param_grad_helper(dL_dK, X, None, self.variances.gradient) - + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): tmp = dL_dKdiag[:, None] * X ** 2 if self.ARD: @@ -71,7 +71,7 @@ class Linear(Kern): self.variances.gradient = tmp.sum() self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) - + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): self._psi_computations(Z, mu, S) # psi0: @@ -87,7 +87,7 @@ class Linear(Kern): #from Kmm self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - + def K(self, X, X2, target): if self.ARD: XX = X * np.sqrt(self.variances) @@ -224,7 +224,7 @@ class Linear(Kern): weave_options = {'headers' : [''], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_link_args' : ['-lgomp']} - + N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], @@ -281,7 +281,7 @@ class Linear(Kern): self._X2 = None else: self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index e0d069b2..67637770 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -36,38 +36,33 @@ class Prod(Kern): self._params = None def K(self, X, X2=None): - self._K_computations(X,X2) + self._K_computations(X, X2) return self._K1 * self._K2 + def Kdiag(self, X): + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def Kdiag(self, X): - """Compute the diagonal of the covariance matrix associated to X.""" - return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) + self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) - def update_gradients_sparse(self): - pass - #wtf goes here?? - #def dKdiag_dtheta(self,dL_dKdiag,X,target): - #K1 = np.zeros(X.shape[0]) - #K2 = np.zeros(X.shape[0]) - #self.k1.Kdiag(X[:,self.slice1],K1) - #self.k2.Kdiag(X[:,self.slice2],K2) - #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2): + def gradients_X(self, dL_dK, X, X2=None): """derivative of the covariance matrix with respect to X.""" - self._K_computations(X,X2) + self._K_computations(X, X2) + target = np.zeros(X.shape) if X2 is None: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) else: - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) + + return target def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) @@ -78,7 +73,7 @@ class Prod(Kern): self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - def _K_computations(self,X,X2): + def _K_computations(self, X, X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): self._X = X.copy() self._params == self._get_params().copy() diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index eb713433..02640fdc 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -154,7 +154,7 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2): + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 2be73389..d20e2fe1 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -20,9 +20,8 @@ class White(Kern): self.input_dim = input_dim self.variance = Param('variance', variance, Logexp()) self.add_parameters(self.variance) - self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2): + def K(self, X, X2=None): if X2 is None: return np.eye(X.shape[0])*self.variance else: From d636c8c30ce696ad27360b4f31a439263b98c2b5 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Thu, 20 Feb 2014 14:04:16 +0000 Subject: [PATCH 6/6] everything is broken --- GPy/core/gp.py | 1 + GPy/core/parameterization/param.py | 10 +- GPy/core/parameterization/parameter_core.py | 46 ++--- GPy/core/sparse_gp.py | 15 +- GPy/examples/dimensionality_reduction.py | 26 +-- GPy/kern/__init__.py | 2 +- GPy/kern/_src/add.py | 203 ++++++++++---------- GPy/kern/_src/kern.py | 16 +- GPy/kern/_src/linear.py | 85 ++++---- GPy/kern/_src/prod.py | 59 ++---- GPy/kern/_src/rbf.py | 78 ++++---- GPy/models/bayesian_gplvm.py | 14 +- GPy/util/caching.py | 93 ++++++--- 13 files changed, 325 insertions(+), 323 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 2dcf0e14..13336ef5 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -44,6 +44,7 @@ class GP(Model): self.Y_metadata = None assert isinstance(kernel, kern.Kern) + assert self.input_dim == kernel.input_dim self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index f54c0117..016ecbf6 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri :param input_array: array which this parameter handles :type input_array: numpy.ndarray :param default_constraint: The default constraint for this parameter - :type default_constraint: + :type default_constraint: You can add/remove constraints by calling constrain on the parameter itself, e.g: @@ -59,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri def __init__(self, name, input_array, default_constraint=None): super(Param, self).__init__(name=name, default_constraint=default_constraint) - + def __array_finalize__(self, obj): # see InfoArray.__array_finalize__ for comments if obj is None: return @@ -192,7 +192,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri return numpy.r_[a] return numpy.r_[:b] return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) - + #=========================================================================== # Convenience #=========================================================================== @@ -260,7 +260,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parameteri clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)] for i in range(self._realndim_-len(clean_curr_slice)): i+=len(clean_curr_slice) - clean_curr_slice += range(self._realshape_[i]) + clean_curr_slice += range(self._realshape_[i]) if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice) and len(set(map(len, clean_curr_slice))) <= 1): return numpy.fromiter(itertools.izip(*clean_curr_slice), @@ -426,4 +426,4 @@ class ParamConcatenation(object): start = False return "\n".join(strings) def __repr__(self): - return "\n".join(map(repr,self.params)) \ No newline at end of file + return "\n".join(map(repr,self.params)) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 275198b2..5acdec58 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -18,8 +18,8 @@ class Observable(object): def remove_observer(self, observer): del self._observers_[observer] def _notify_observers(self): - [callble(self) for callble in self._observers_.itervalues()] - + [callble(self) for callble in self._observers_.values()] + class Pickleable(object): def _getstate(self): """ @@ -51,7 +51,7 @@ class Parentable(object): super(Parentable,self).__init__() self._direct_parent_ = direct_parent self._parent_index_ = parent_index - + def has_parent(self): return self._direct_parent_ is not None @@ -82,7 +82,7 @@ class Nameable(Parentable): from_name = self.name self._name = name if self.has_parent(): - self._direct_parent_._name_changed(self, from_name) + self._direct_parent_._name_changed(self, from_name) class Parameterizable(Parentable): @@ -90,7 +90,7 @@ class Parameterizable(Parentable): super(Parameterizable, self).__init__(*args, **kwargs) from GPy.core.parameterization.array_core import ParamList _parameters_ = ParamList() - + def parameter_names(self, add_name=False): if add_name: return [adjust_name_for_printing(self.name) + "." + xi for x in self._parameters_ for xi in x.parameter_names(add_name=True)] @@ -142,21 +142,21 @@ class Gradcheckable(Parentable): class Indexable(object): def _raveled_index(self): raise NotImplementedError, "Need to be able to get the raveled Index" - + def _internal_offset(self): return 0 - + def _offset_for(self, param): raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?" - + def _raveled_index_for(self, param): """ get the raveled index for a param that is an int array, containing the indexes for the flattened param inside this parameterized logic. """ - raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" - + raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?" + class Constrainable(Nameable, Indexable, Parameterizable): def __init__(self, name, default_constraint=None): super(Constrainable,self).__init__(name) @@ -166,7 +166,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self.priors = ParameterIndexOperations() if self._default_constraint_ is not None: self.constrain(self._default_constraint_) - + #=========================================================================== # Fixing Parameters: #=========================================================================== @@ -182,21 +182,21 @@ class Constrainable(Nameable, Indexable, Parameterizable): rav_i = self._highest_parent_._raveled_index_for(self) self._highest_parent_._set_fixed(rav_i) fix = constrain_fixed - + def unconstrain_fixed(self): """ This parameter will no longer be fixed. """ unconstrained = self.unconstrain(__fixed__) - self._highest_parent_._set_unfixed(unconstrained) + self._highest_parent_._set_unfixed(unconstrained) unfix = unconstrain_fixed - + def _set_fixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) self._fixes_[index] = FIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED - + def _set_unfixed(self, index): import numpy as np if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) @@ -212,7 +212,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): self._fixes_[fixed_indices] = FIXED else: self._fixes_ = None - + def _has_fixes(self): return hasattr(self, "_fixes_") and self._fixes_ is not None @@ -222,17 +222,17 @@ class Constrainable(Nameable, Indexable, Parameterizable): def set_prior(self, prior, warning=True, update=True): repriorized = self.unset_priors() self._add_to_index_operations(self.priors, repriorized, prior, warning, update) - + def unset_priors(self, *priors): return self._remove_from_index_operations(self.priors, priors) - + def log_prior(self): """evaluate the prior""" if self.priors.size > 0: x = self._get_params() return reduce(lambda a,b: a+b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return 0. - + def _log_prior_gradients(self): """evaluate the gradients of the priors""" import numpy as np @@ -242,7 +242,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] return ret return 0. - + #=========================================================================== # Constrain operations -> done #=========================================================================== @@ -269,7 +269,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): transformats of this parameter object. """ return self._remove_from_index_operations(self.constraints, transforms) - + def constrain_positive(self, warning=True, update=True): """ :param warning: print a warning if re-constraining parameters. @@ -314,7 +314,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): Remove (lower, upper) bounded constrain from this parameter/ """ self.unconstrain(Logistic(lower, upper)) - + def _parent_changed(self, parent): from index_operations import ParameterIndexOperationsView self.constraints = ParameterIndexOperationsView(parent.constraints, parent._offset_for(self), self.size) @@ -340,7 +340,7 @@ class Constrainable(Nameable, Indexable, Parameterizable): removed = np.union1d(removed, unconstrained) if t is __fixed__: self._highest_parent_._set_unfixed(unconstrained) - + return removed diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 128dfca3..c72de182 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -53,20 +53,19 @@ class SparseGP(GP): self.add_parameter(self.Z, index=0) self.parameters_changed() - def _update_gradients_Z(self, add=False): - #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) + def _gradients_Z(self): + #The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed) if not self.Z.is_fixed: - if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) - else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) if self.X_variance is None: - self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) + self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) else: - self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) + self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) + print self.Z.gradient + print id(self.Z) def parameters_changed(self): self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + self.Z.gradient = self._gradients_Z() def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index a7eb0adb..a5e8615d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -22,18 +22,18 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, # generate GPLVM-like data X = _np.random.rand(num_inputs, input_dim) lengthscales = _np.random.rand(input_dim) - k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) + k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) #+ GPy.kern.white(input_dim, 0.01) ) K = k.K(X) Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T - # k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) - k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) - # k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) - # k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) + #k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) + # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0) + # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) p = .3 @@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True): data = GPy.util.datasets.oil_100() Y = data['X'] # create simple GP model - kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) + kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6) m = GPy.models.GPLVM(Y, 6, kernel=kernel) m.data_labels = data['Y'].argmax(axis=1) if optimize: m.optimize('scg', messages=verbose) @@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci Y = Y - Y.mean(0) Y /= Y.std(0) # Create the model - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m.data_labels = data['Y'][:N].argmax(axis=1) @@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4 (1 - var))) + .001 Z = _np.random.permutation(X)[:num_inducing] - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c @@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, _np.random.seed(0) data = GPy.util.datasets.oil() - kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) Y = data['X'][:N] Yn = Gaussian(Y, normalize=True) m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) @@ -435,7 +435,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() # optimize - back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) + back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) if optimize: m.optimize(messages=verbose, max_f_eval=10000) @@ -470,7 +470,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): data = GPy.util.datasets.osu_run1() Q = 6 - kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) + kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) # optimize m.ensure_default_constraints() diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 214e230f..630d74da 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,7 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern -Linear = 'foo' +from _src.linear import Linear #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index edb82ef0..acc69fd4 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -34,7 +34,7 @@ class Add(Kern): :param X: the first set of inputs to the kernel :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. + handLes this as X2 == X. """ assert X.shape[1] == self.input_dim if X2 is None: @@ -48,9 +48,6 @@ class Add(Kern): def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -69,123 +66,125 @@ class Add(Kern): return target def Kdiag(self, X): - """Compute the diagonal of the covariance function for inputs X.""" assert X.shape[1] == self.input_dim return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) + def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0) def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S + return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2): + # white doesn;t combine with anything + if isinstance(p1, White) or isinstance(p2, White): + pass + # rbf X bias + #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): + elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))): + tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) + psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) + #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): + elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): + tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) + psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) + else: + raise NotImplementedError, "psi2 cannot be computed for this kernel" + return psi2 - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import White + from rbf import RBF + #from rbf_inv import RBFInv + #from bias import Bias + from linear import Linear + #ffrom fixed import Fixed - target += crossterms + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, White): + continue + elif isinstance(p2, Bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. + + + p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) + + + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed + + target = np.zeros(Z.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): + + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. + + + target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) return target - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + from white import white + from rbf import rbf + #from rbf_inv import rbfinv + #from bias import bias + from linear import linear + #ffrom fixed import fixed - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + target_mu = np.zeros(mu.shape) + target_S = np.zeros(S.shape) + for p1, is1 in zip(self._parameters_, self.input_slices): - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! + eff_dL_dpsi1 = dL_dpsi1.copy() + for p2, is2 in zip(self._parameters_, self.input_slices): + if p2 is p1: + continue + if isinstance(p2, white): + continue + elif isinstance(p2, bias): + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. + else: + eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) + target_mu += a + target_S += b return target_mu, target_S def plot(self, *args, **kwargs): diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index b5b84305..dd87200e 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -9,7 +9,7 @@ from ...core.parameterization.param import Param class Kern(Parameterized): - def __init__(self,input_dim,name): + def __init__(self, input_dim, name): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -22,21 +22,15 @@ class Kern(Parameterized): super(Kern, self).__init__(name) self.input_dim = input_dim - def K(self,X,X2,target): + def K(self, X, X2, target): raise NotImplementedError - def Kdiag(self,X,target): + def Kdiag(self, Xa ,target): raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): + def _param_grad_helper(self, dL_dK,X, X2, target): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) def psi0(self,Z,mu,S,target): raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target): raise NotImplementedError def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): raise NotImplementedError diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index b3765774..7f5d43d3 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -9,6 +9,7 @@ from ...util.linalg import tdot from ...util.misc import fast_array_equal, param_to_array from ...core.parameterization import Param from ...core.parameterization.transformations import Logexp +from ...util.caching import Cacher, cache_this class Linear(Kern): """ @@ -45,22 +46,35 @@ class Linear(Kern): variances = np.ones(self.input_dim) self.variances = Param('variances', variances, Logexp()) - #TODO: remove?self.variances.gradient = np.zeros(self.variances.shape) self.add_parameter(self.variances) - self.variances.add_observer(self, self.update_variance) + self.variances.add_observer(self, self._on_changed) - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2 = np.empty(shape=(2, 1)) + def _on_changed(self, obj): + self._notify_observers() - def update_variance(self, v): - self.variances2 = np.square(self.variances) + @cache_this(limit=3, reset_on_self=True) + def K(self, X, X2=None): + if self.ARD: + if X2 is None: + return tdot(X*np.sqrt(self.variances)) + else: + rv = np.sqrt(self.variances) + return np.dot(X*rv, (X2*rv).T) + else: + return self._dot_product(X, X2) * self.variances - def on_input_change(self, X): - self._K_computations(X, None) + @cache_this(limit=3, reset_on_self=False) + def _dot_product(self, X, X2=None): + if X2 is None: + return tdot(X) + else: + return np.dot(X, X2.T) + + def Kdiag(self, X): + return np.sum(self.variances * np.square(X), -1) def update_gradients_full(self, dL_dK, X): - self.variances.gradient[:] = 0 + self.variances.gradient = np.zeros(self.variances.size) self._param_grad_helper(dL_dK, X, None, self.variances.gradient) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): @@ -68,7 +82,7 @@ class Linear(Kern): if self.ARD: self.variances.gradient = tmp.sum(0) else: - self.variances.gradient = tmp.sum() + self.variances.gradient = np.atleast_1d(tmp.sum()) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient) @@ -85,25 +99,8 @@ class Linear(Kern): if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0) else: self.variances.gradient += tmp.sum() #from Kmm - self._K_computations(Z, None) self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient) - def K(self, X, X2, target): - if self.ARD: - XX = X * np.sqrt(self.variances) - if X2 is None: - target += tdot(XX) - else: - XX2 = X2 * np.sqrt(self.variances) - target += np.dot(XX, XX2.T) - else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += self.variances * self._dot_product - - def Kdiag(self, X, target): - np.add(target, np.sum(self.variances * np.square(X), -1), target) - def _param_grad_helper(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: @@ -112,18 +109,16 @@ class Linear(Kern): product = X[:, None, :] * X2[None, :, :] target += (dL_dK[:, :, None] * product).sum(0).sum(0) else: - if X is not self._X or X2 is not None: - self._K_computations(X, X2) - target += np.sum(self._dot_product * dL_dK) + target += np.sum(self._dot_product(X, X2) * dL_dK) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2=None): if X2 is None: - target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) else: - target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) + return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) - def dKdiag_dX(self,dL_dKdiag,X,target): - target += 2.*self.variances*dL_dKdiag[:,None]*X + def gradients_X_diag(self, dL_dKdiag, X): + return 2.*self.variances*dL_dKdiag[:,None]*X #---------------------------------------# # PSI statistics # @@ -273,15 +268,15 @@ class Linear(Kern): # Precomputations # #---------------------------------------# - def _K_computations(self, X, X2): - if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): - self._X = X.copy() - if X2 is None: - self._dot_product = tdot(param_to_array(X)) - self._X2 = None - else: - self._X2 = X2.copy() - self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) + #def _K_computations(self, X, X2): + #if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)): + #self._X = X.copy() + #if X2 is None: + ##self._dot_product = tdot(param_to_array(X)) + #self._X2 = None + #else: + #self._X2 = X2.copy() + #self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T)) def _psi_computations(self, Z, mu, S): # here are the "statistics" for psi1 and psi2 diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 67637770..1d033f70 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -2,9 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) from kern import Kern -from coregionalize import Coregionalize import numpy as np -import hashlib class Prod(Kern): """ @@ -17,7 +15,7 @@ class Prod(Kern): :rtype: kernel object """ - def __init__(self,k1,k2,tensor=False): + def __init__(self, k1, k2, tensor=False): if tensor: super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name) self.slice1 = slice(0,k1.input_dim) @@ -25,64 +23,43 @@ class Prod(Kern): else: assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension." super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name) - self.slice1 = slice(0,self.input_dim) - self.slice2 = slice(0,self.input_dim) + self.slice1 = slice(0, self.input_dim) + self.slice2 = slice(0, self.input_dim) self.k1 = k1 self.k2 = k2 self.add_parameters(self.k1, self.k2) - #initialize cache - self._X, self._X2 = np.empty(shape=(2,1)) - self._params = None - def K(self, X, X2=None): - self._K_computations(X, X2) - return self._K1 * self._K2 + if X2 is None: + return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) + else: + return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) def Kdiag(self, X): return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) def update_gradients_full(self, dL_dK, X): - self._K_computations(X, None) - self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) - self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) + self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1]) + self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2]) def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] ) self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] ) def gradients_X(self, dL_dK, X, X2=None): - """derivative of the covariance matrix with respect to X.""" - self._K_computations(X, X2) target = np.zeros(X.shape) if X2 is None: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None) + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None) else: - target[:,self.slice1] += self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1]) - target[:,self.slice2] += self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2]) - + target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2]) return target - def dKdiag_dX(self, dL_dKdiag, X, target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1]) + target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2]) + return target - self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2]) - - def _K_computations(self, X, X2): - if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): - self._X = X.copy() - self._params == self._get_params().copy() - if X2 is None: - self._X2 = None - self._K1 = self.k1.K(X[:,self.slice1],None) - self._K2 = self.k2.K(X[:,self.slice2],None) - else: - self._X2 = X2.copy() - self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) - self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 02640fdc..0508436f 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -79,17 +79,18 @@ class RBF(Kern): ret[:] = self.variance return ret - #TODO: remove TARGET! - def psi0(self, Z, mu, S, target): - target += self.variance + def psi0(self, Z, mu, S): + ret = np.empty(mu.shape[0], dtype=np.float64) + ret[:] = self.variance + return ret - def psi1(self, Z, mu, S, target): + def psi1(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi1 + return self._psi1 - def psi2(self, Z, mu, S, target): + def psi2(self, Z, mu, S): self._psi_computations(Z, mu, S) - target += self._psi2 + return self._psi2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) @@ -154,6 +155,37 @@ class RBF(Kern): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) + def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + + #psi1 + denominator = (self.lengthscale2 * (self._psi1_denom)) + dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) + grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) + + #psi2 + term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim + term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim + dZ = self._psi2[:, :, :, None] * (term1[None] + term2) + grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) + + return grad + + def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + self._psi_computations(Z, mu, S) + #psi1 + tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom + grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) + grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) + + tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom + grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) + grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) + + return grad_mu, grad_S + + + def gradients_X(self, dL_dK, X, X2=None): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) @@ -171,36 +203,7 @@ class RBF(Kern): # PSI statistics # #---------------------------------------# - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): - pass - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): - self._psi_computations(Z, mu, S) - denominator = (self.lengthscale2 * (self._psi1_denom)) - dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) - target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): - self._psi_computations(Z, mu, S) - tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom - target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) - target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): - self._psi_computations(Z, mu, S) - term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim - term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim - dZ = self._psi2[:, :, :, None] * (term1[None] + term2) - target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,num_inducing,num_inducing,input_dim """ - self._psi_computations(Z, mu, S) - tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom - target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) - target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) - - #---------------------------------------# + #---------------------------------------# # Precomputations # #---------------------------------------# @@ -362,6 +365,7 @@ class RBF(Kern): #include #include """ + mu = param_to_array(mu) weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 914ca4ae..5fb1ca59 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM): self.init = state.pop() SparseGP._setstate(self, state) - def dL_dmuS(self): - dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance) - dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance) - dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance) - dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2 - dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2 - - return dL_dmu, dL_dS - def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data def parameters_changed(self): - self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y) - self._update_gradients_Z(add=False) + super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.KL_divergence() - dL_dmu, dL_dS = self.dL_dmuS() + dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict) # dL: self.q.mean.gradient = dL_dmu diff --git a/GPy/util/caching.py b/GPy/util/caching.py index 51ba56f3..1f10cd64 100644 --- a/GPy/util/caching.py +++ b/GPy/util/caching.py @@ -1,46 +1,89 @@ -from ..core.parameterization.array_core import ObservableArray, ParamList +from ..core.parameterization.parameter_core import Observable +from ..core.parameterization.array_core import ParamList + class Cacher(object): - def __init__(self, operation, limit=5): + def __init__(self, operation, limit=5, reset_on_first=False): self.limit = int(limit) + self._reset_on_first = reset_on_first self.operation=operation - self.cached_inputs = ParamList([]) + self.cached_inputs = [] self.cached_outputs = [] self.inputs_changed = [] - def __call__(self, X): - assert isinstance(X, ObservableArray) - if X in self.cached_inputs: - i = self.cached_inputs.index(X) + def __call__(self, *args): + if self._reset_on_first: + assert isinstance(args[0], Observable) + args[0].add_observer(args[0], self.reset) + cached_args = args + else: + cached_args = args[1:] + + + if not all([isinstance(arg, Observable) for arg in cached_args]): + return self.operation(*args) + if cached_args in self.cached_inputs: + i = self.cached_inputs.index(cached_args) if self.inputs_changed[i]: - self.cached_outputs[i] = self.operation(X) + self.cached_outputs[i] = self.operation(*args) self.inputs_changed[i] = False return self.cached_outputs[i] else: if len(self.cached_inputs) == self.limit: - X_ = self.cached_inputs.pop(0) - X_.remove_observer(self) + args_ = self.cached_inputs.pop(0) + [a.remove_observer(self) for a in args_] self.inputs_changed.pop(0) self.cached_outputs.pop(0) - self.cached_inputs.append(X) - self.cached_outputs.append(self.operation(X)) + self.cached_inputs.append(cached_args) + self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False) - X.add_observer(self, self.on_cache_changed) + [a.add_observer(self, self.on_cache_changed) for a in args] return self.cached_outputs[-1] - def on_cache_changed(self, X): - #print id(X) - Xbase = X - while Xbase is not None: - try: - i = self.cached_inputs.index(X) - break - except ValueError: - Xbase = X.base - continue - self.inputs_changed[i] = True + def on_cache_changed(self, arg): + self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] + + def reset(self, obj): + [[a.remove_observer(self) for a in args] for args in self.cached_inputs] + self.cached_inputs = [] + self.cached_outputs = [] + self.inputs_changed = [] + + + + +def cache_this(limit=5, reset_on_self=False): + def limited_cache(f): + c = Cacher(f, limit, reset_on_first=reset_on_self) + def f_wrap(*args): + return c(*args) + f_wrap._cacher = c + return f_wrap + return limited_cache + + + + + + + + + + + + + #Xbase = X + #while Xbase is not None: + #try: + #i = self.cached_inputs.index(X) + #break + #except ValueError: + #Xbase = X.base + #continue + #self.inputs_changed[i] = True + + -