[dxxdiag] some steps towards the diagonal gradients in xx

This commit is contained in:
Max Zwiessele 2016-06-08 10:22:36 +01:00
parent a3f458926b
commit 787168a394
4 changed files with 42 additions and 40 deletions

View file

@ -377,7 +377,7 @@ class GP(Model):
if full_cov:
dK2_dXdX = kern.gradients_XX(one, Xnew)
else:
dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
dK2_dXdX = kern.gradients_XX(one, Xnew).sum(0)
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
#for i in range(Xnew.shape[0]):
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])

View file

@ -85,23 +85,22 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def gradients_XX(self, dL_dK, X, X2, cov=True):
if cov: # full covarance
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
else: # diagonal covariance
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=cov)) for p in self.parts]
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
#else: # diagonal covariance
# if X2 is None:
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
# else:
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape+(X.shape[1],))
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X, cov=cov)) for p in self.parts]
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=3, force_kwargs=['which_parts'])
@ -188,7 +187,7 @@ class Add(CombinationKernel):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
from .static import White, Bias
for p1 in self.parts:
@ -200,9 +199,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -219,7 +218,7 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
@ -227,7 +226,7 @@ class Add(CombinationKernel):
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
from .static import White, Bias
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
@ -240,9 +239,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += tmp * p2.variance
eff_dL_dpsi1 += tmp * p2.variance
else:
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads
@ -255,7 +254,7 @@ class Add(CombinationKernel):
# other.unlink_parameter(p)
# parts.extend(other.parts)
# #self.link_parameters(*other_params)
#
#
# else:
# #self.link_parameter(other)
# parts.append(other)
@ -271,7 +270,7 @@ class Add(CombinationKernel):
else:
return super(Add, self).input_sensitivity(summarize)
def sde_update_gradient_full(self, gradients):
"""
Update gradient in the order in which parameters are represented in the
@ -283,12 +282,12 @@ class Add(CombinationKernel):
part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num
def sde(self):
"""
Support adding kernels for sde representation
"""
import scipy.linalg as la
F = None
@ -312,51 +311,51 @@ class Add(CombinationKernel):
L = la.block_diag(L,Lt) if (L is not None) else Lt
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
H = np.hstack((H,Ht)) if (H is not None) else Ht
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
if dF is not None:
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
'constant', constant_values=0)
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
else:
dF = dFt
if dQc is not None:
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
'constant', constant_values=0)
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
else:
dQc = dQct
if dPinf is not None:
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
'constant', constant_values=0)
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
else:
dPinf = dPinft
if dP0 is not None:
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
'constant', constant_values=0)
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
else:
dP0 = dP0t
n += Ft.shape[0]
nq += Qct.shape[0]
nd += dFt.shape[2]
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)

View file

@ -266,11 +266,11 @@ class Stationary(Kern):
..returns:
dL2_dXdX: [NxQxQ]
"""
dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1)
assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
l2 = np.ones(X.shape[1])*self.lengthscale**2
return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
l4 = np.ones(X.shape[1])*self.lengthscale**2
return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
def _gradients_X_pure(self, dL_dK, X, X2=None):

View file

@ -135,10 +135,13 @@ class Kern_check_d2Kdiag_dXdX(Kern_check_model):
self.Xc = X.copy()
def log_likelihood(self):
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
l = 0.
for i in range(self.X.shape[0]):
l += self.kernel.gradients_X(self.dL_dK[[i],[i]], self.X[[i]], self.Xc[[i]]).sum()
return l
def parameters_changed(self):
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
grads = -self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
self.X.gradient[:] = grads.sum(-1)
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):