diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 2835ee29..37c424ad 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -4,7 +4,7 @@ import numpy as np import itertools from ...util.caching import Cache_this -from .kern import CombinationKernel +from .kern import CombinationKernel, Kern from functools import reduce class Add(CombinationKernel): @@ -21,8 +21,21 @@ class Add(CombinationKernel): for part in kern.parts[::-1]: kern.unlink_parameter(part) subkerns.insert(i, part) - super(Add, self).__init__(subkerns, name) + self._exact_psicomp = self._check_exact_psicomp() + + def _check_exact_psicomp(self): + from .. import RBF,Linear,Bias,White + n_kerns = len(self.parts) + n_rbf = len([k for k in self.parts if isinstance(k,RBF)]) + n_linear = len([k for k in self.parts if isinstance(k,Linear)]) + n_bias = len([k for k in self.parts if isinstance(k,Bias)]) + n_white = len([k for k in self.parts if isinstance(k,White)]) + n_others = n_kerns - n_rbf - n_linear - n_bias - n_white + if n_rbf+n_linear<=1 and n_bias<=1 and n_white<=1 and n_others==0: + return True + else: + return False @Cache_this(limit=2, force_kwargs=['which_parts']) def K(self, X, X2=None, which_parts=None): @@ -87,14 +100,17 @@ class Add(CombinationKernel): @Cache_this(limit=1, force_kwargs=['which_parts']) def psi0(self, Z, variational_posterior): + if not self._exact_psicomp: return Kern.psi0(self,Z,variational_posterior) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) @Cache_this(limit=1, force_kwargs=['which_parts']) def psi1(self, Z, variational_posterior): + if not self._exact_psicomp: return Kern.psi1(self,Z,variational_posterior) return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) @Cache_this(limit=1, force_kwargs=['which_parts']) def psi2(self, Z, variational_posterior): + if not self._exact_psicomp: return Kern.psi2(self,Z,variational_posterior) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) #return psi2 # compute the "cross" terms @@ -130,6 +146,7 @@ class Add(CombinationKernel): @Cache_this(limit=1, force_kwargs=['which_parts']) def psi2n(self, Z, variational_posterior): + if not self._exact_psicomp: return Kern.psi2n(self, Z, variational_posterior) psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts)) #return psi2 # compute the "cross" terms @@ -164,6 +181,7 @@ class Add(CombinationKernel): return psi2 def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias for p1 in self.parts: #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! @@ -180,6 +198,7 @@ class Add(CombinationKernel): p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + if not self._exact_psicomp: return Kern.gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias target = np.zeros(Z.shape) for p1 in self.parts: @@ -198,6 +217,7 @@ class Add(CombinationKernel): return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) from .static import White, Bias target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters] for p1 in self.parameters: diff --git a/GPy/kern/_src/psi_comp/gaussherm.py b/GPy/kern/_src/psi_comp/gaussherm.py index 923b3eb0..bb44b219 100644 --- a/GPy/kern/_src/psi_comp/gaussherm.py +++ b/GPy/kern/_src/psi_comp/gaussherm.py @@ -13,9 +13,6 @@ from ....util.linalg import tdot from . import PSICOMP class PSICOMP_GH(PSICOMP): - """ - TODO: support Psi2 with shape NxMxM - """ def __init__(self, degree=5, cache_K=True): self.degree = degree @@ -48,7 +45,7 @@ class PSICOMP_GH(PSICOMP): psi0 = np.zeros((N,)) psi1 = np.zeros((N,M)) - psi2 = np.zeros((M,M)) + psi2 = np.zeros((N,M,M)) if return_psi2_n else np.zeros((M,M)) for i in xrange(self.degree): if self.cache_K: X = Xs[i] @@ -57,7 +54,10 @@ class PSICOMP_GH(PSICOMP): psi0 += self.weights[i]* kern.Kdiag(X) Kfu = kern.K(X,Z) psi1 += self.weights[i]* Kfu - psi2 += self.weights[i]* tdot(Kfu.T) + if return_psi2_n: + psi2 += self.weights[i]* Kfu[:,None,:]*Kfu[:,:,None] + else: + psi2 += self.weights[i]* tdot(Kfu.T) return psi0, psi1, psi2 @Cache_this(limit=10, ignore_args=(0, 2,3,4)) @@ -84,7 +84,10 @@ class PSICOMP_GH(PSICOMP): dtheta += kern.gradient dX = kern.gradients_X_diag(dL_dpsi0_i, X) Kfu = kern.K(X,Z) - dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i] + if len(dL_dpsi2.shape)==2: + dL_dkfu = (dL_dpsi1+ Kfu.dot(dL_dpsi2+dL_dpsi2.T))*self.weights[i] + else: + dL_dkfu = (dL_dpsi1+ (Kfu[:,:,None]*(dL_dpsi2+np.swapaxes(dL_dpsi2, 1,2))).sum(1))*self.weights[i] kern.update_gradients_full(dL_dkfu, X, Z) dtheta += kern.gradient dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index cf102f99..5dcead8a 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -469,9 +469,10 @@ class Kernel_Psi_statistics_GradientTests(unittest.TestCase): self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2) def test_kernels(self): - from GPy.kern import RBF,Linear,MLP + from GPy.kern import RBF,Linear,MLP,Bias,White Q = self.Z.shape[1] - kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True)] + kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True),MLP(Q,ARD=True), RBF(Q,ARD=True)+Linear(Q,ARD=True)+Bias(Q)+White(Q) + ,RBF(Q,ARD=True)+Bias(Q)+White(Q), Linear(Q,ARD=True)+Bias(Q)+White(Q)] for k in kernels: k.randomize()