slicing now returns the right shape, when computing derivative wrt X or Z

This commit is contained in:
Max Zwiessele 2014-03-17 15:43:09 +00:00
parent f88b4c92e8
commit 62d594d977
3 changed files with 50 additions and 18 deletions

View file

@ -58,7 +58,13 @@ class Add(CombinationKernel):
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape) target = np.zeros(X.shape)
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts] [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
#[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target return target
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -131,7 +137,7 @@ class Add(CombinationKernel):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -151,8 +157,8 @@ class Add(CombinationKernel):
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu[:, p1.active_dims] += a target_mu += a
target_S[:, p1.active_dims] += b target_S += b
return target_mu, target_S return target_mu, target_S
def _getstate(self): def _getstate(self):

View file

@ -4,6 +4,7 @@ Created on 11 Mar 2014
@author: maxz @author: maxz
''' '''
from ...core.parameterization.parameterized import ParametersChangedMeta from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
class KernCallsViaSlicerMeta(ParametersChangedMeta): class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw): def __call__(self, *args, **kw):
@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True) instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True) instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True) instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True) instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True) instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False) instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False) instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False) instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True) instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True) instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True) instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
instance.parameters_changed() instance.parameters_changed()
return instance return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False): def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
""" """
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension. This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
The different switches are: The different switches are:
@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
""" """
if derivative: if derivative:
if diag: if diag:
def x_slice_wrapper(dL_dK, X): def x_slice_wrapper(dL_dKdiag, X):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X = kern._slice_X(X) if not kern._sliced_X else X X = kern._slice_X(X) if not kern._sliced_X else X
# if the return value is of shape X.shape, we need to make sure to return the right shape
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dK, X) if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
else: ret = operation(dL_dKdiag, X)
except: except:
raise raise
finally: finally:
@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
elif psi_stat: elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1 kern._sliced_X += 1
# if the return value is of shape X.shape, we need to make sure to return the right shape
try: try:
ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior) if ret_X_not_sliced:
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
r2 = ret[:2]
ret[0] = ret1
ret[1] = ret2
ret[0][:, kern.active_dims] = r2[0]
ret[1][:, kern.active_dims] = r2[1]
del r2
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except: except:
raise raise
finally: finally:
@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
elif psi_stat_Z: elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior): def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced: ret = np.zeros(Z.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior) if ret_X_not_sliced:
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except: except:
raise raise
finally: finally:
@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret return ret
else: else:
def x_slice_wrapper(dL_dK, X, X2=None): def x_slice_wrapper(dL_dK, X, X2=None):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2 X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1 kern._sliced_X += 1
try: try:
ret = operation(dL_dK, X, X2) if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
else: ret = operation(dL_dK, X, X2)
except: except:
raise raise
finally: finally:

View file

@ -51,15 +51,15 @@ class Prod(CombinationKernel):
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2) target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2) target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2): for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X) target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X) target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
return target return target