From 62d594d9776f013b8900bb541adc051aaf1facd2 Mon Sep 17 00:00:00 2001 From: Max Zwiessele Date: Mon, 17 Mar 2014 15:43:09 +0000 Subject: [PATCH] slicing now returns the right shape, when computing derivative wrt X or Z --- GPy/kern/_src/add.py | 14 +++++--- GPy/kern/_src/kernel_slice_operations.py | 46 ++++++++++++++++++------ GPy/kern/_src/prod.py | 8 ++--- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 97afd1f0..ca1f4533 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -58,7 +58,13 @@ class Add(CombinationKernel): :type X2: np.ndarray (num_inducing x input_dim)""" target = np.zeros(X.shape) - [target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] + [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts] + return target + + def gradients_X_diag(self, dL_dKdiag, X): + target = np.zeros(X.shape) + [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] + #[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] return target def psi0(self, Z, variational_posterior): @@ -131,7 +137,7 @@ class Add(CombinationKernel): eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. - target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) + target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) return target def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): @@ -151,8 +157,8 @@ class Add(CombinationKernel): else: eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) - target_mu[:, p1.active_dims] += a - target_S[:, p1.active_dims] += b + target_mu += a + target_S += b return target_mu, target_S def _getstate(self): diff --git a/GPy/kern/_src/kernel_slice_operations.py b/GPy/kern/_src/kernel_slice_operations.py index ff33cc24..c355ccad 100644 --- a/GPy/kern/_src/kernel_slice_operations.py +++ b/GPy/kern/_src/kernel_slice_operations.py @@ -4,6 +4,7 @@ Created on 11 Mar 2014 @author: maxz ''' from ...core.parameterization.parameterized import ParametersChangedMeta +import numpy as np class KernCallsViaSlicerMeta(ParametersChangedMeta): def __call__(self, *args, **kw): @@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta): instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) - instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True) - instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True) + instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True) + instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True) instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) - instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True) - instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True) + instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True) + instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True) instance.parameters_changed() return instance -def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False): +def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False): """ This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. The different switches are: @@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False """ if derivative: if diag: - def x_slice_wrapper(dL_dK, X): + def x_slice_wrapper(dL_dKdiag, X): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X = kern._slice_X(X) if not kern._sliced_X else X + # if the return value is of shape X.shape, we need to make sure to return the right shape kern._sliced_X += 1 try: - ret = operation(dL_dK, X) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X) + else: ret = operation(dL_dKdiag, X) except: raise finally: @@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat: def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 + # if the return value is of shape X.shape, we need to make sure to return the right shape try: - ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)) + r2 = ret[:2] + ret[0] = ret1 + ret[1] = ret2 + ret[0][:, kern.active_dims] = r2[0] + ret[1][:, kern.active_dims] = r2[1] + del r2 + else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret elif psi_stat_Z: def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: ret = np.zeros(Z.shape) Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior kern._sliced_X += 1 try: - ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + if ret_X_not_sliced: + ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) + else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) except: raise finally: @@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False return ret else: def x_slice_wrapper(dL_dK, X, X2=None): + ret_X_not_sliced = ret_X and kern._sliced_X == 0 + if ret_X_not_sliced: + ret = np.zeros(X.shape) X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 kern._sliced_X += 1 try: - ret = operation(dL_dK, X, X2) + if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2) + else: ret = operation(dL_dK, X, X2) except: raise finally: diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index f3b2b50f..e00f38c3 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -51,15 +51,15 @@ class Prod(CombinationKernel): def gradients_X(self, dL_dK, X, X2=None): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) - target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) + target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) + target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) return target def gradients_X_diag(self, dL_dKdiag, X): target = np.zeros(X.shape) for k1,k2 in itertools.combinations(self.parts, 2): - target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) - target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) + target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) + target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) return target