diff --git a/GPy/core/gp.py b/GPy/core/gp.py index d769678e..10ba8e6b 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -43,7 +43,7 @@ class GP(Model): else: self.Y_metadata = None - assert isinstance(kernel, kern.kern) + assert isinstance(kernel, kern.Kern) self.kern = kernel assert isinstance(likelihood, likelihoods.Likelihood) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 0a758f1e..2098bd76 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,29 +1,32 @@ -import bias -import Brownian -import coregionalize -import exponential -import eq_ode1 -import finite_dimensional -import fixed -import gibbs -import hetero -import hierarchical -import independent_outputs -import linear -import Matern32 -import Matern52 -import mlp -import ODE_1 -import periodic_exponential -import periodic_Matern32 -import periodic_Matern52 -import poly -import prod_orthogonal -import prod -import rational_quadratic -import rbfcos -import rbf -import rbf_inv -import spline -import symmetric -import white +from rbf import RBF +from white import White +from kern import Kern +#import bias +#import Brownian +#import coregionalize +#import exponential +#import eq_ode1 +#import finite_dimensional +#import fixed +#import gibbs +#import hetero +#import hierarchical +#import independent_outputs +#import linear +#import Matern32 +#import Matern52 +#import mlp +#import ODE_1 +#import periodic_exponential +#import periodic_Matern32 +#import periodic_Matern52 +#import poly +#import prod_orthogonal +#import prod +#import rational_quadratic +#import rbfcos +#import rbf +#import rbf_inv +#import spline +#import symmetric +#import white diff --git a/GPy/kern/__init__old.py b/GPy/kern/__init__old.py deleted file mode 100644 index eb4076c3..00000000 --- a/GPy/kern/__init__old.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -from constructors import * -try: - from constructors import rbf_sympy, sympykern # these depend on sympy -except: - pass -from kern import * diff --git a/GPy/kern/Brownian.py b/GPy/kern/_src/Brownian.py similarity index 100% rename from GPy/kern/Brownian.py rename to GPy/kern/_src/Brownian.py diff --git a/GPy/kern/Matern32.py b/GPy/kern/_src/Matern32.py similarity index 100% rename from GPy/kern/Matern32.py rename to GPy/kern/_src/Matern32.py diff --git a/GPy/kern/Matern52.py b/GPy/kern/_src/Matern52.py similarity index 100% rename from GPy/kern/Matern52.py rename to GPy/kern/_src/Matern52.py diff --git a/GPy/kern/ODE_1.py b/GPy/kern/_src/ODE_1.py similarity index 100% rename from GPy/kern/ODE_1.py rename to GPy/kern/_src/ODE_1.py diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py new file mode 100644 index 00000000..8d916941 --- /dev/null +++ b/GPy/kern/_src/add.py @@ -0,0 +1,264 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from linear import Linear +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param +from kern import Kern + +class Add(Kern): + def __init__(self, subkerns, tensor): + assert all([isinstance(k, Kern) for k in subkerns]) + if tensor: + input_dim = sum([k.input_dim for k in subkerns]) + self.input_slices = [] + n = 0 + for k in subkerns: + self.input_slices.append(slice(n, n+k.input_dim)) + n += k.input_dim + else: + assert all([k.input_dim == subkerns[0].input_dim for k in subkerns]) + input_dim = subkerns[0].input_dim + self.input_slices = [slice(None) for k in subkerns] + super(Add, self).__init__(input_dim, 'add') + self.add_parameters(*subkerns) + + + def K(self, X, X2=None, which_parts='all'): + """ + Compute the kernel function. + + :param X: the first set of inputs to the kernel + :param X2: (optional) the second set of arguments to the kernel. If X2 + is None, this is passed throgh to the 'part' object, which + handles this as X2 == X. + :param which_parts: a list of booleans detailing whether to include + each of the part functions. By default, 'all' + indicates all parts + """ + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + if X2 is None: + target = np.zeros((X.shape[0], X.shape[0])) + [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + else: + target = np.zeros((X.shape[0], X2.shape[0])) + [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return target + + def update_gradients_full(self, dL_dK, X): + [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] + + def _param_grad_helper(self, dL_dK, X, X2=None): + """ + Compute the gradient of the covariance function with respect to the parameters. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: Np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim) + + returns: dL_dtheta + """ + assert X.shape[1] == self.input_dim + target = np.zeros(self.size) + if X2 is None: + [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + else: + [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] + + return self._transform_gradients(target) + + def gradients_X(self, dL_dK, X, X2=None): + """Compute the gradient of the objective function with respect to X. + + :param dL_dK: An array of gradients of the objective function with respect to the covariance function. + :type dL_dK: np.ndarray (num_samples x num_inducing) + :param X: Observed data inputs + :type X: np.ndarray (num_samples x input_dim) + :param X2: Observed data inputs (optional, defaults to X) + :type X2: np.ndarray (num_inducing x input_dim)""" + + target = np.zeros_like(X) + if X2 is None: + [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + else: + [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def Kdiag(self, X, which_parts='all'): + """Compute the diagonal of the covariance function for inputs X.""" + if which_parts == 'all': + which_parts = [True] * self.size + assert X.shape[1] == self.input_dim + target = np.zeros(X.shape[0]) + [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] + return target + + def dKdiag_dtheta(self, dL_dKdiag, X): + """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" + assert X.shape[1] == self.input_dim + assert dL_dKdiag.size == X.shape[0] + target = np.zeros(self.size) + [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + return self._transform_gradients(target) + + def dKdiag_dX(self, dL_dKdiag, X): + assert X.shape[1] == self.input_dim + target = np.zeros_like(X) + [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def psi0(self, Z, mu, S): + target = np.zeros(mu.shape[0]) + [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): + target = np.zeros(self.size) + [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): + target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) + [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi1(self, Z, mu, S): + target = np.zeros((mu.shape[0], Z.shape[0])) + [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): + target = np.zeros((self.size)) + [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] + return self._transform_gradients(target) + + def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target + + def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): + """return shapes are num_samples,num_inducing,input_dim""" + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + return target_mu, target_S + + def psi2(self, Z, mu, S): + """ + Computer the psi2 statistics for the covariance function. + + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) + :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) + :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) + + """ + target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) + [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: input_slices needed + crossterms = 0 + + for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): + if i_s1 == i_s2: + # TODO psi1 this must be faster/better/precached/more nice + tmp1 = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) + tmp2 = np.zeros((mu.shape[0], Z.shape[0])) + p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) + + prod = np.multiply(tmp1, tmp2) + crossterms += prod[:, :, None] + prod[:, None, :] + + target += crossterms + return target + + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): + """Gradient of the psi2 statistics with respect to the parameters.""" + target = np.zeros(self.size) + [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] + + # compute the "cross" terms + # TODO: better looping, input_slices + for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): + p1, p2 = self._parameters_[i1], self._parameters_[i2] +# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] + ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] + + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) + + return self._transform_gradients(target) + + def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): + target = np.zeros_like(Z) + [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + # target *= 2 + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) + + return target * 2 + + def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): + target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) + [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + + # compute the "cross" terms + # TODO: we need input_slices here. + for p1, p2 in itertools.permutations(self._parameters_, 2): +# if p1.name == 'linear' and p2.name == 'linear': +# raise NotImplementedError("We don't handle linear/linear cross-terms") + tmp = np.zeros((mu.shape[0], Z.shape[0])) + p1.psi1(Z, mu, S, tmp) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) + + return target_mu, target_S + + def plot(self, *args, **kwargs): + """ + See GPy.plotting.matplot_dep.plot + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + kernel_plots.plot(self,*args) + + def _getstate(self): + """ + Get the current state of the class, + here just all the indices, rest can get recomputed + """ + return Parameterized._getstate(self) + [#self._parameters_, + self.input_dim, + self.input_slices, + self._param_slices_ + ] + + def _setstate(self, state): + self._param_slices_ = state.pop() + self.input_slices = state.pop() + self.input_dim = state.pop() + Parameterized._setstate(self, state) + + diff --git a/GPy/kern/bias.py b/GPy/kern/_src/bias.py similarity index 100% rename from GPy/kern/bias.py rename to GPy/kern/_src/bias.py diff --git a/GPy/kern/constructors.py b/GPy/kern/_src/constructors.py similarity index 100% rename from GPy/kern/constructors.py rename to GPy/kern/_src/constructors.py diff --git a/GPy/kern/coregionalize.py b/GPy/kern/_src/coregionalize.py similarity index 100% rename from GPy/kern/coregionalize.py rename to GPy/kern/_src/coregionalize.py diff --git a/GPy/kern/eq_ode1.py b/GPy/kern/_src/eq_ode1.py similarity index 100% rename from GPy/kern/eq_ode1.py rename to GPy/kern/_src/eq_ode1.py diff --git a/GPy/kern/exponential.py b/GPy/kern/_src/exponential.py similarity index 100% rename from GPy/kern/exponential.py rename to GPy/kern/_src/exponential.py diff --git a/GPy/kern/finite_dimensional.py b/GPy/kern/_src/finite_dimensional.py similarity index 100% rename from GPy/kern/finite_dimensional.py rename to GPy/kern/_src/finite_dimensional.py diff --git a/GPy/kern/fixed.py b/GPy/kern/_src/fixed.py similarity index 100% rename from GPy/kern/fixed.py rename to GPy/kern/_src/fixed.py diff --git a/GPy/kern/gibbs.py b/GPy/kern/_src/gibbs.py similarity index 100% rename from GPy/kern/gibbs.py rename to GPy/kern/_src/gibbs.py diff --git a/GPy/kern/hetero.py b/GPy/kern/_src/hetero.py similarity index 100% rename from GPy/kern/hetero.py rename to GPy/kern/_src/hetero.py diff --git a/GPy/kern/hierarchical.py b/GPy/kern/_src/hierarchical.py similarity index 100% rename from GPy/kern/hierarchical.py rename to GPy/kern/_src/hierarchical.py diff --git a/GPy/kern/independent_outputs.py b/GPy/kern/_src/independent_outputs.py similarity index 100% rename from GPy/kern/independent_outputs.py rename to GPy/kern/_src/independent_outputs.py diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py new file mode 100644 index 00000000..af362498 --- /dev/null +++ b/GPy/kern/_src/kern.py @@ -0,0 +1,336 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import sys +import numpy as np +import itertools +from ..core.parameterization import Parameterized +from GPy.core.parameterization.param import Param + + +class Kern(Parameterized): + def __init__(self,input_dim,name): + """ + The base class for a kernel: a positive definite function + which forms of a covariance function (kernel). + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + super(Kern, self).__init__(name) + self.input_dim = input_dim + + def K(self,X,X2,target): + raise NotImplementedError + def Kdiag(self,X,target): + raise NotImplementedError + def _param_grad_helper(self,dL_dK,X,X2,target): + raise NotImplementedError + def dKdiag_dtheta(self,dL_dKdiag,X,target): # TODO: Max?? + # In the base case compute this by calling _param_grad_helper. Need to + # override for stationary covariances (for example) to save + # time. + for i in range(X.shape[0]): + self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) + def psi0(self,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + raise NotImplementedError + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi1(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dtheta(self,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): + raise NotImplementedError + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def psi2(self,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): + raise NotImplementedError + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): + raise NotImplementedError + def gradients_X(self, dL_dK, X, X2, target): + raise NotImplementedError + def dKdiag_dX(self, dL_dK, X, target): + raise NotImplementedError + def update_gradients_full(self, dL_dK, X): + """Set the gradients of all parameters when doing full (N) inference.""" + raise NotImplementedError + def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): + """Set the gradients of all parameters when doing sparse (M) inference.""" + raise NotImplementedError + def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): + """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" + raise NotImplementedError + + def plot_ARD(self, *args): + """If an ARD kernel is present, plot a bar representation using matplotlib + + See GPy.plotting.matplot_dep.plot_ARD + """ + assert "matplotlib" in sys.modules, "matplotlib package has not been imported." + from ..plotting.matplot_dep import kernel_plots + return kernel_plots.plot_ARD(self,*args) + + + def __add__(self, other): + """ Overloading of the '+' operator. for more control, see self.add """ + return self.add(other) + + def add(self, other, tensor=False): + """ + Add another kernel to this one. + + If Tensor is False, both kernels are defined on the same _space_. then + the created kernel will have the same number of inputs as self and + other (which must be the same). + + If Tensor is True, then the dimensions are stacked 'horizontally', so + that the resulting kernel has self.input_dim + other.input_dim + + :param other: the other kernel to be added + :type other: GPy.kern + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from add import Add + return Add([self, other], tensor) + + def __call__(self, X, X2=None): + return self.K(X, X2) + + def __mul__(self, other): + """ Here we overload the '*' operator. See self.prod for more information""" + return self.prod(other) + + def __pow__(self, other, tensor=False): + """ + Shortcut for tensor `prod`. + """ + return self.prod(other, tensor=True) + + def prod(self, other, tensor=False): + """ + Multiply two kernels (either on the same space, or on the tensor product of the input space). + + :param other: the other kernel to be added + :type other: GPy.kern + :param tensor: whether or not to use the tensor space (default is false). + :type tensor: bool + + """ + assert isinstance(other, Kern), "only kernels can be added to kernels..." + from prod import Prod + return Prod(self, other, tensor) + + +from GPy.core.model import Model + +class Kern_check_model(Model): + """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Model.__init__(self, 'kernel_test_model') + num_samples = 20 + num_samples2 = 10 + if kernel==None: + kernel = GPy.kern.rbf(1) + if X==None: + X = np.random.randn(num_samples, kernel.input_dim) + if dL_dK==None: + if X2==None: + dL_dK = np.ones((X.shape[0], X.shape[0])) + else: + dL_dK = np.ones((X.shape[0], X2.shape[0])) + + self.kernel=kernel + self.add_parameter(kernel) + self.X = X + self.X2 = X2 + self.dL_dK = dL_dK + + def is_positive_definite(self): + v = np.linalg.eig(self.kernel.K(self.X))[0] + if any(v<-10*sys.float_info.epsilon): + return False + else: + return True + + def log_likelihood(self): + return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() + + def _log_likelihood_gradients(self): + raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." + +class Kern_check_dK_dtheta(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + + def _log_likelihood_gradients(self): + return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) + + + + + +class Kern_check_dKdiag_dtheta(Kern_check_model): + """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" + def __init__(self, kernel=None, dL_dK=None, X=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + def parameters_changed(self): + self.kernel.update_gradients_full(self.dL_dK, self.X) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) + +class Kern_check_dK_dX(Kern_check_model): + """This class allows gradient checks for the gradient of a kernel with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) + self.remove_parameter(kernel) + self.X = Param('X', self.X) + self.add_parameter(self.X) + def _log_likelihood_gradients(self): + return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() + +class Kern_check_dKdiag_dX(Kern_check_dK_dX): + """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ + def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): + Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) + if dL_dK==None: + self.dL_dK = np.ones((self.X.shape[0])) + + def log_likelihood(self): + return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() + + def _log_likelihood_gradients(self): + return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() + +def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): + """ + This function runs on kernels to check the correctness of their + implementation. It checks that the covariance function is positive definite + for a randomly generated data set. + + :param kern: the kernel to be tested. + :type kern: GPy.kern.Kernpart + :param X: X input values to test the covariance function. + :type X: ndarray + :param X2: X2 input values to test the covariance function. + :type X2: ndarray + + """ + pass_checks = True + if X==None: + X = np.random.randn(10, kern.input_dim) + if output_ind is not None: + X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) + if X2==None: + X2 = np.random.randn(20, kern.input_dim) + if output_ind is not None: + X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) + + if verbose: + print("Checking covariance function is positive definite.") + result = Kern_check_model(kern, X=X).is_positive_definite() + if result and verbose: + print("Check passed.") + if not result: + print("Positive definite check failed for " + kern.name + " covariance function.") + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt theta.") + result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt theta.") + result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of K(X, X2) wrt X.") + try: + result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) + pass_checks = False + return False + + if verbose: + print("Checking gradients of Kdiag(X) wrt X.") + try: + result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) + except NotImplementedError: + result=True + if verbose: + print("gradients_X not implemented for " + kern.name) + if result and verbose: + print("Check passed.") + if not result: + print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") + Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) + pass_checks = False + return False + + return pass_checks diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py new file mode 100644 index 00000000..097ed741 --- /dev/null +++ b/GPy/kern/_src/kernpart.py @@ -0,0 +1,60 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) +#from ...core.parameterized.Parameterized import set_as_parameter +from ...core.parameterization import Parameterized + +class Kernpart_stationary(Kernpart): + def __init__(self, input_dim, lengthscale=None, ARD=False): + self.input_dim = input_dim + self.ARD = ARD + if not ARD: + self.num_params = 2 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" + else: + self.lengthscale = np.ones(1) + else: + self.num_params = self.input_dim + 1 + if lengthscale is not None: + self.lengthscale = np.asarray(lengthscale) + assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" + else: + self.lengthscale = np.ones(self.input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + + def _set_params(self, x): + self.lengthscale = x + self.lengthscale2 = np.square(self.lengthscale) + # reset cached results + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S + + + def dKdiag_dtheta(self, dL_dKdiag, X, target): + # For stationary covariances, derivative of diagonal elements + # wrt lengthscale is 0. + target[0] += np.sum(dL_dKdiag) + + def dKdiag_dX(self, dL_dK, X, target): + pass # true for all stationary kernels + + +class Kernpart_inner(Kernpart): + def __init__(self,input_dim): + """ + The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. + + :param input_dim: the number of input dimensions to the function + :type input_dim: int + + Do not instantiate. + """ + Kernpart.__init__(self, input_dim) + + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/linear.py b/GPy/kern/_src/linear.py similarity index 98% rename from GPy/kern/linear.py rename to GPy/kern/_src/linear.py index 828ece11..ab77d4e6 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/_src/linear.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class Linear(Kernpart): +class Linear(Kern): """ Linear kernel diff --git a/GPy/kern/mlp.py b/GPy/kern/_src/mlp.py similarity index 100% rename from GPy/kern/mlp.py rename to GPy/kern/_src/mlp.py diff --git a/GPy/kern/odekern1.c b/GPy/kern/_src/odekern1.c similarity index 100% rename from GPy/kern/odekern1.c rename to GPy/kern/_src/odekern1.c diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/_src/periodic_Matern32.py similarity index 100% rename from GPy/kern/periodic_Matern32.py rename to GPy/kern/_src/periodic_Matern32.py diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/_src/periodic_Matern52.py similarity index 100% rename from GPy/kern/periodic_Matern52.py rename to GPy/kern/_src/periodic_Matern52.py diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/_src/periodic_exponential.py similarity index 100% rename from GPy/kern/periodic_exponential.py rename to GPy/kern/_src/periodic_exponential.py diff --git a/GPy/kern/poly.py b/GPy/kern/_src/poly.py similarity index 100% rename from GPy/kern/poly.py rename to GPy/kern/_src/poly.py diff --git a/GPy/kern/prod.py b/GPy/kern/_src/prod.py similarity index 98% rename from GPy/kern/prod.py rename to GPy/kern/_src/prod.py index 364c91b3..08221de7 100644 --- a/GPy/kern/prod.py +++ b/GPy/kern/_src/prod.py @@ -1,17 +1,17 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern from coregionalize import Coregionalize import numpy as np import hashlib -class Prod(Kernpart): +class Prod(Kern): """ Computes the product of 2 kernels :param k1, k2: the kernels to multiply - :type k1, k2: Kernpart + :type k1, k2: Kern :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :type tensor: Boolean :rtype: kernel object diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/_src/prod_orthogonal.py similarity index 100% rename from GPy/kern/prod_orthogonal.py rename to GPy/kern/_src/prod_orthogonal.py diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/_src/rational_quadratic.py similarity index 100% rename from GPy/kern/rational_quadratic.py rename to GPy/kern/_src/rational_quadratic.py diff --git a/GPy/kern/rbf.py b/GPy/kern/_src/rbf.py similarity index 92% rename from GPy/kern/rbf.py rename to GPy/kern/_src/rbf.py index 027aa382..36e454e3 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -4,13 +4,13 @@ import numpy as np from scipy import weave -from kernpart import Kernpart -from ...util.linalg import tdot -from ...util.misc import fast_array_equal, param_to_array -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from kern import Kern +from ..util.linalg import tdot +from ..util.misc import fast_array_equal, param_to_array +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class RBF(Kernpart): +class RBF(Kern): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -52,30 +52,16 @@ class RBF(Kernpart): lengthscale = np.ones(self.input_dim) self.variance = Param('variance', variance, Logexp()) - + self.lengthscale = Param('lengthscale', lengthscale, Logexp()) self.lengthscale.add_observer(self, self.update_lengthscale) self.update_lengthscale(self.lengthscale) - + self.add_parameters(self.variance, self.lengthscale) self.parameters_changed() # initializes cache - #self.update_inv_lengthscale(self.lengthscale) - #self.parameters_changed() - # initialize cache - #self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - #self._X, self._X2, self._params_save = np.empty(shape=(3, 1)) - - # a set of optional args to pass to weave - # self.weave_options = {'headers' : [''], - # 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], - # 'extra_link_args' : ['-lgomp']} self.weave_options = {} - def on_input_change(self, X): - #self._K_computations(X, None) - pass - def update_lengthscale(self, l): self.lengthscale2 = np.square(self.lengthscale) @@ -84,13 +70,16 @@ class RBF(Kernpart): self._X, self._X2 = np.empty(shape=(2, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - def K(self, X, X2, target): + def K(self, X, X2=None): self._K_computations(X, X2) - target += self.variance * self._K_dvar + return self.variance * self._K_dvar - def Kdiag(self, X, target): - np.add(target, self.variance, target) + def Kdiag(self, X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret + #TODO: remove TARGET! def psi0(self, Z, mu, S, target): target += self.variance @@ -165,7 +154,7 @@ class RBF(Kernpart): else: self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm) - def gradients_X(self, dL_dK, X, X2, target): + def gradients_X(self, dL_dK, X, X2): #if self._X is None or X.base is not self._X.base or X2 is not None: self._K_computations(X, X2) if X2 is None: @@ -173,10 +162,10 @@ class RBF(Kernpart): else: _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) - target += np.sum(gradients_X * dL_dK.T[:, :, None], 0) + return np.sum(gradients_X * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self, dL_dKdiag, X, target): - pass + def dKdiag_dX(self, dL_dKdiag, X): + return np.zeros(X.shape[0]) #---------------------------------------# # PSI statistics # diff --git a/GPy/kern/rbf_inv.py b/GPy/kern/_src/rbf_inv.py similarity index 100% rename from GPy/kern/rbf_inv.py rename to GPy/kern/_src/rbf_inv.py diff --git a/GPy/kern/rbfcos.py b/GPy/kern/_src/rbfcos.py similarity index 100% rename from GPy/kern/rbfcos.py rename to GPy/kern/_src/rbfcos.py diff --git a/GPy/kern/spline.py b/GPy/kern/_src/spline.py similarity index 100% rename from GPy/kern/spline.py rename to GPy/kern/_src/spline.py diff --git a/GPy/kern/ss_rbf.py b/GPy/kern/_src/ss_rbf.py similarity index 100% rename from GPy/kern/ss_rbf.py rename to GPy/kern/_src/ss_rbf.py diff --git a/GPy/kern/symmetric.py b/GPy/kern/_src/symmetric.py similarity index 100% rename from GPy/kern/symmetric.py rename to GPy/kern/_src/symmetric.py diff --git a/GPy/kern/sympy_helpers.cpp b/GPy/kern/_src/sympy_helpers.cpp similarity index 100% rename from GPy/kern/sympy_helpers.cpp rename to GPy/kern/_src/sympy_helpers.cpp diff --git a/GPy/kern/sympy_helpers.h b/GPy/kern/_src/sympy_helpers.h similarity index 100% rename from GPy/kern/sympy_helpers.h rename to GPy/kern/_src/sympy_helpers.h diff --git a/GPy/kern/sympykern.py b/GPy/kern/_src/sympykern.py similarity index 100% rename from GPy/kern/sympykern.py rename to GPy/kern/_src/sympykern.py diff --git a/GPy/kern/white.py b/GPy/kern/_src/white.py similarity index 77% rename from GPy/kern/white.py rename to GPy/kern/_src/white.py index c7e4c6dd..7750267f 100644 --- a/GPy/kern/white.py +++ b/GPy/kern/_src/white.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np -from ...core.parameterization import Param -from ...core.parameterization.transformations import Logexp +from ..core.parameterization import Param +from ..core.parameterization.transformations import Logexp -class White(Kernpart): +class White(Kern): """ White noise kernel. @@ -22,12 +22,14 @@ class White(Kernpart): self.add_parameters(self.variance) self._psi1 = 0 # TODO: more elegance here - def K(self,X,X2,target): + def K(self,X,X2): if X2 is None: - target += np.eye(X.shape[0])*self.variance + return np.eye(X.shape[0])*self.variance - def Kdiag(self,X,target): - target += self.variance + def Kdiag(self,X): + ret = np.ones(X.shape[0]) + ret[:] = self.variance + return ret def update_gradients_full(self, dL_dK, X): self.variance.gradient = np.trace(dL_dK) @@ -38,14 +40,8 @@ class White(Kernpart): def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - target += np.sum(dL_dKdiag) - - def gradients_X(self,dL_dK,X,X2,target): - pass - - def dKdiag_dX(self,dL_dKdiag,X,target): - pass + def gradients_X(self,dL_dK,X,X2): + return np.zeros_like(X) def psi0(self,Z,mu,S,target): pass # target += self.variance diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py deleted file mode 100644 index 53728d0d..00000000 --- a/GPy/kern/kern.py +++ /dev/null @@ -1,680 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import sys -import numpy as np -import itertools -from parts.prod import Prod as prod -from parts.linear import Linear -from parts.kernpart import Kernpart -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param - -class kern(Parameterized): - def __init__(self, input_dim, parts=[], input_slices=None): - """ - This is the main kernel class for GPy. It handles multiple - (additive) kernel functions, and keeps track of various things - like which parameters live where. - - The technical code for kernels is divided into _parts_ (see - e.g. rbf.py). This object contains a list of parts, which are - computed additively. For multiplication, special _prod_ parts - are used. - - :param input_dim: The dimensionality of the kernel's input space - :type input_dim: int - :param parts: the 'parts' (PD functions) of the kernel - :type parts: list of Kernpart objects - :param input_slices: the slices on the inputs which apply to each kernel - :type input_slices: list of slice objects, or list of bools - - """ - super(kern, self).__init__('kern') - self.add_parameters(*parts) - self.input_dim = input_dim - - if input_slices is None: - self.input_slices = [slice(None) for p in self._parameters_] - else: - assert len(input_slices) == len(self._parameters_) - self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] - - for p in self._parameters_: - assert isinstance(p, Kernpart), "bad kernel part" - - def parameters_changed(self): - [p.parameters_changed() for p in self._parameters_] - - def connect_input(self, Xparam): - [p.connect_input(Xparam) for p in self._parameters_] - - def _getstate(self): - """ - Get the current state of the class, - here just all the indices, rest can get recomputed - """ - return Parameterized._getstate(self) + [#self._parameters_, - #self.num_params, - self.input_dim, - self.input_slices, - self._param_slices_ - ] - - def _setstate(self, state): - self._param_slices_ = state.pop() - self.input_slices = state.pop() - self.input_dim = state.pop() - #self.num_params = state.pop() - #self._parameters_ = state.pop() - Parameterized._setstate(self, state) - - - def plot_ARD(self, *args): - """If an ARD kernel is present, plot a bar representation using matplotlib - - See GPy.plotting.matplot_dep.plot_ARD - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - return kernel_plots.plot_ARD(self,*args) - -# def _transform_gradients(self, g): -# """ -# Apply the transformations of the kernel so that the returned vector -# represents the gradient in the transformed space (i.e. that given by -# get_params_transformed()) -# -# :param g: the gradient vector for the current model, usually created by _param_grad_helper -# """ -# x = self._get_params() -# [np.place(g, index, g[index] * constraint.gradfactor(x[index])) -# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__] -# # for constraint, index in self.constraints.iteritems(): -# # if constraint != __fixed__: -# # g[index] = g[index] * constraint.gradfactor(x[index]) -# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]] -# # if len(self.tied_indices) or len(self.fixed_indices): -# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# # return np.delete(g, to_remove) -# # else: -# if self._fixes_ is not None: return g[self._fixes_] -# return g -# x = self._get_params() -# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] -# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]] -# if len(self.tied_indices) or len(self.fixed_indices): -# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices])) -# return np.delete(g, to_remove) -# else: -# return g - - def __add__(self, other): - """ Overloading of the '+' operator. for more control, see self.add """ - return self.add(other) - - def add(self, other, tensor=False): - """ - Add another kernel to this one. - - If Tensor is False, both kernels are defined on the same _space_. then - the created kernel will have the same number of inputs as self and - other (which must be the same). - - If Tensor is True, then the dimensions are stacked 'horizontally', so - that the resulting kernel has self.input_dim + other.input_dim - - :param other: the other kernel to be added - :type other: GPy.kern - - """ - if tensor: - D = self.input_dim + other.input_dim - self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] - - newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices) - - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.constraints = self.constraints + other.constraints -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - else: - assert self.input_dim == other.input_dim - newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices) - # transfer constraints: -# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices] -# newkern.constraints = self.constraints + other.constraints -# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices] -# newkern.fixed_values = self.fixed_values + other.fixed_values -# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices] - - [newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()] - [newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()] - newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None - - return newkern - - def __call__(self, X, X2=None): - return self.K(X, X2) - - def __mul__(self, other): - """ Here we overload the '*' operator. See self.prod for more information""" - return self.prod(other) - - def __pow__(self, other, tensor=False): - """ - Shortcut for tensor `prod`. - """ - return self.prod(other, tensor=True) - - def prod(self, other, tensor=False): - """ - Multiply two kernels (either on the same space, or on the tensor product of the input space). - - :param other: the other kernel to be added - :type other: GPy.kern - :param tensor: whether or not to use the tensor space (default is false). - :type tensor: bool - - """ - K1 = self - K2 = other - #K1 = self.copy() - #K2 = other.copy() - - slices = [] - for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim - s1[sl1], s2[sl2] = [True], [True] - slices += [s1 + s2] - - newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)] - - if tensor: - newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) - else: - newkern = kern(K1.input_dim, newkernparts, slices) - - #newkern._follow_constrains(K1, K2) - return newkern - -# def _follow_constrains(self, K1, K2): -# -# # Build the array that allows to go from the initial indices of the param to the new ones -# K1_param = [] -# n = 0 -# for k1 in K1.parts: -# K1_param += [range(n, n + k1.num_params)] -# n += k1.num_params -# n = 0 -# K2_param = [] -# for k2 in K2.parts: -# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)] -# n += k2.num_params -# index_param = [] -# for p1 in K1_param: -# for p2 in K2_param: -# index_param += p1 + p2 -# index_param = np.array(index_param) -# -# # Get the ties and constrains of the kernels before the multiplication -# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices] -# -# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices] -# prev_constr = K1.constraints + K2.constraints -# -# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices] -# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values -# -# # follow the previous ties -# for arr in prev_ties: -# for j in arr: -# index_param[np.where(index_param == j)[0]] = arr[0] -# -# # ties and constrains -# for i in range(K1.num_params + K2.num_params): -# index = np.where(index_param == i)[0] -# if index.size > 1: -# self.tie_params(index) -# for i, t in zip(prev_constr_ind, prev_constr): -# self.constrain(np.where(index_param == i)[0], t) -# -# def _get_params(self): -# return np.hstack(self._parameters_) -# return np.hstack([p._get_params() for p in self._parameters_]) - -# def _set_params(self, x): -# import ipdb;ipdb.set_trace() -# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)] - -# def _get_param_names(self): -# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count -# part_names = np.array([k.name for k in self._parameters_], dtype=np.str) -# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)] -# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)] -# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)] -# -# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], []) - - def K(self, X, X2=None, which_parts='all'): - """ - Compute the kernel function. - - :param X: the first set of inputs to the kernel - :param X2: (optional) the second set of arguments to the kernel. If X2 - is None, this is passed throgh to the 'part' object, which - handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts - """ - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target - - def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] - - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] - - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - - def gradients_X(self, dL_dK, X, X2=None): - """Compute the gradient of the objective function with respect to X. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim)""" - - target = np.zeros_like(X) - if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def Kdiag(self, X, which_parts='all'): - """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size - assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def psi0(self, Z, mu, S): - target = np.zeros(mu.shape[0]) - [p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.size) - [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S): - target_mu, target_S = np.zeros_like(mu), np.zeros_like(S) - [p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi1(self, Z, mu, S): - target = np.zeros((mu.shape[0], Z.shape[0])) - [p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.size)) - [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)] - return self._transform_gradients(target) - - def dpsi1_dZ(self, dL_dpsi1, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target - - def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are num_samples,num_inducing,input_dim""" - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target_mu, target_S - - def psi2(self, Z, mu, S): - """ - Computer the psi2 statistics for the covariance function. - - :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) - :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) - :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) - - """ - target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) - [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: input_slices needed - crossterms = 0 - - for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2): - if i_s1 == i_s2: - # TODO psi1 this must be faster/better/precached/more nice - tmp1 = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1) - tmp2 = np.zeros((mu.shape[0], Z.shape[0])) - p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2) - - prod = np.multiply(tmp1, tmp2) - crossterms += prod[:, :, None] + prod[:, None, :] - - target += crossterms - return target - - def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - """Gradient of the psi2 statistics with respect to the parameters.""" - target = np.zeros(self.size) - [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - - # compute the "cross" terms - # TODO: better looping, input_slices - for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2): - p1, p2 = self._parameters_[i1], self._parameters_[i2] -# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2] - ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2] - - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) - - return self._transform_gradients(target) - - def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): - target = np.zeros_like(Z) - [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - # target *= 2 - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target) - - return target * 2 - - def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S): - target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) - [p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - - # compute the "cross" terms - # TODO: we need input_slices here. - for p1, p2 in itertools.permutations(self._parameters_, 2): -# if p1.name == 'linear' and p2.name == 'linear': -# raise NotImplementedError("We don't handle linear/linear cross-terms") - tmp = np.zeros((mu.shape[0], Z.shape[0])) - p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S) - - return target_mu, target_S - - def plot(self, *args, **kwargs): - """ - See GPy.plotting.matplot_dep.plot - """ - assert "matplotlib" in sys.modules, "matplotlib package has not been imported." - from ..plotting.matplot_dep import kernel_plots - kernel_plots.plot(self,*args) - -from GPy.core.model import Model - -class Kern_check_model(Model): - """This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel.""" - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Model.__init__(self, 'kernel_test_model') - num_samples = 20 - num_samples2 = 10 - if kernel==None: - kernel = GPy.kern.rbf(1) - if X==None: - X = np.random.randn(num_samples, kernel.input_dim) - if dL_dK==None: - if X2==None: - dL_dK = np.ones((X.shape[0], X.shape[0])) - else: - dL_dK = np.ones((X.shape[0], X2.shape[0])) - - self.kernel=kernel - self.add_parameter(kernel) - self.X = X - self.X2 = X2 - self.dL_dK = dL_dK - - def is_positive_definite(self): - v = np.linalg.eig(self.kernel.K(self.X))[0] - if any(v<-10*sys.float_info.epsilon): - return False - else: - return True - - def log_likelihood(self): - return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum() - - def _log_likelihood_gradients(self): - raise NotImplementedError, "This needs to be implemented to use the kern_check_model class." - -class Kern_check_dK_dtheta(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to parameters. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - - def _log_likelihood_gradients(self): - return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2) - -class Kern_check_dKdiag_dtheta(Kern_check_model): - """This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters.""" - def __init__(self, kernel=None, dL_dK=None, X=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - def parameters_changed(self): - self.kernel.update_gradients_full(self.dL_dK, self.X) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dtheta(self.dL_dK, self.X) - -class Kern_check_dK_dX(Kern_check_model): - """This class allows gradient checks for the gradient of a kernel with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2) - self.remove_parameter(kernel) - self.X = Param('X', self.X) - self.add_parameter(self.X) - def _log_likelihood_gradients(self): - return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten() - -class Kern_check_dKdiag_dX(Kern_check_dK_dX): - """This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """ - def __init__(self, kernel=None, dL_dK=None, X=None, X2=None): - Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None) - if dL_dK==None: - self.dL_dK = np.ones((self.X.shape[0])) - - def log_likelihood(self): - return (self.dL_dK*self.kernel.Kdiag(self.X)).sum() - - def _log_likelihood_gradients(self): - return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten() - -def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): - """ - This function runs on kernels to check the correctness of their - implementation. It checks that the covariance function is positive definite - for a randomly generated data set. - - :param kern: the kernel to be tested. - :type kern: GPy.kern.Kernpart - :param X: X input values to test the covariance function. - :type X: ndarray - :param X2: X2 input values to test the covariance function. - :type X2: ndarray - - """ - pass_checks = True - if X==None: - X = np.random.randn(10, kern.input_dim) - if output_ind is not None: - X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) - if X2==None: - X2 = np.random.randn(20, kern.input_dim) - if output_ind is not None: - X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) - - if verbose: - print("Checking covariance function is positive definite.") - result = Kern_check_model(kern, X=X).is_positive_definite() - if result and verbose: - print("Check passed.") - if not result: - print("Positive definite check failed for " + kern.name + " covariance function.") - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt theta.") - result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt theta.") - result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of K(X, X2) wrt X.") - try: - result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) - pass_checks = False - return False - - if verbose: - print("Checking gradients of Kdiag(X) wrt X.") - try: - result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose) - except NotImplementedError: - result=True - if verbose: - print("gradients_X not implemented for " + kern.name) - if result and verbose: - print("Check passed.") - if not result: - print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") - Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True) - pass_checks = False - return False - - return pass_checks diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py deleted file mode 100644 index 06f1446b..00000000 --- a/GPy/kern/kernpart.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart(Parameterized): - def __init__(self,input_dim,name): - """ - The base class for a kernpart: a positive definite function - which forms part of a covariance function (kernel). - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - super(Kernpart, self).__init__(name) - # the input dimensionality for the covariance - self.input_dim = input_dim - # the number of optimisable parameters - # the name of the covariance function. - # link to parameterized objects - #self._X = None - - def connect_input(self, X): - X.add_observer(self, self.on_input_change) - #self._X = X - - def on_input_change(self, X): - """ - During optimization this function will be called when - the inputs X changed. Use this to update caches dependent - on the inputs X. - """ - # overwrite this to update kernel when inputs X change - pass - - -# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs): -# """ -# :param names: name of parameter to set as parameter -# :param gradient: gradient method to get the gradient of this parameter -# :param index: index of where to place parameter in printing -# :param args, kwargs: additional arguments to gradient -# -# Convenience method to connect Kernpart parameters: -# parameter with name (attribute of this Kernpart) will be set as parameter with following name: -# -# kernel_name + _ + parameter_name -# -# To add the kernels name to the parameter name use this method to -# add parameters. -# """ -# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs) -# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs): -# """ -# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter` -# -# Note: this method adds the kernels name in front of the parameter. -# """ -# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs) -# if index is None: -# self._parameters_.append(p) -# else: -# self._parameters_.insert(index, p) -# self.__dict__[name] = p - #set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable -# def _get_params(self): -# raise NotImplementedError -# def _set_params(self,x): -# raise NotImplementedError -# def _get_param_names(self): -# raise NotImplementedError - def K(self,X,X2,target): - raise NotImplementedError - def Kdiag(self,X,target): - raise NotImplementedError - def _param_grad_helper(self,dL_dK,X,X2,target): - raise NotImplementedError - def dKdiag_dtheta(self,dL_dKdiag,X,target): - # In the base case compute this by calling _param_grad_helper. Need to - # override for stationary covariances (for example) to save - # time. - for i in range(X.shape[0]): - self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target) - def psi0(self,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): - raise NotImplementedError - def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi1(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dtheta(self,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): - raise NotImplementedError - def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def psi2(self,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): - raise NotImplementedError - def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): - raise NotImplementedError - def gradients_X(self, dL_dK, X, X2, target): - raise NotImplementedError - def dKdiag_dX(self, dL_dK, X, target): - raise NotImplementedError - def update_gradients_full(self, dL_dK, X): - """Set the gradients of all parameters when doing full (N) inference.""" - raise NotImplementedError - def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - """Set the gradients of all parameters when doing sparse (M) inference.""" - raise NotImplementedError - def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): - """Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" - raise NotImplementedError - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 3e105785..b4f987ea 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import kern +from GPy.kern.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): @@ -48,11 +48,11 @@ class MRD(Model): # sort out the kernels if kernels is None: kernels = [None] * len(likelihood_or_Y_list) - elif isinstance(kernels, kern): + elif isinstance(kernels, Kern): kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] else: assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" - assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" + assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!" assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 19c96bc0..80350475 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.parts.linear import Linear +from ...kern.linear import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):