mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
[dxxdiag] some steps towards the diagonal gradients in xx
This commit is contained in:
parent
a3f458926b
commit
787168a394
4 changed files with 42 additions and 40 deletions
|
|
@ -377,7 +377,7 @@ class GP(Model):
|
|||
if full_cov:
|
||||
dK2_dXdX = kern.gradients_XX(one, Xnew)
|
||||
else:
|
||||
dK2_dXdX = -kern.gradients_XX(one, Xnew).sum(0)
|
||||
dK2_dXdX = kern.gradients_XX(one, Xnew).sum(0)
|
||||
#dK2_dXdX = np.zeros((Xnew.shape[0], Xnew.shape[1], Xnew.shape[1]))
|
||||
#for i in range(Xnew.shape[0]):
|
||||
# dK2_dXdX[i:i+1,:,:] = kern.gradients_XX(one, Xnew[i:i+1,:])
|
||||
|
|
|
|||
|
|
@ -85,23 +85,22 @@ class Add(CombinationKernel):
|
|||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2, cov=True):
|
||||
if cov: # full covarance
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
|
||||
else: # diagonal covariance
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
|
||||
[target.__iadd__(p.gradients_XX(dL_dK, X, X2, cov=cov)) for p in self.parts]
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0], X.shape[1], X.shape[1]))
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1], X.shape[1]))
|
||||
#else: # diagonal covariance
|
||||
# if X2 is None:
|
||||
# target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
|
||||
# else:
|
||||
# target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
|
||||
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_XX_diag(self, dL_dKdiag, X, cov=True):
|
||||
def gradients_XX_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape+(X.shape[1],))
|
||||
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X, cov=cov)) for p in self.parts]
|
||||
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
|
|
@ -188,7 +187,7 @@ class Add(CombinationKernel):
|
|||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
|
||||
|
||||
|
||||
if not self._exact_psicomp: return Kern.update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
from .static import White, Bias
|
||||
for p1 in self.parts:
|
||||
|
|
@ -200,9 +199,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
else:# np.setdiff1d(p1._all_dims_active, ar2, assume_unique): # TODO: Careful, not correct for overlapping _all_dims_active
|
||||
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
|
||||
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
|
|
@ -219,7 +218,7 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
else:
|
||||
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
|
||||
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
|
@ -227,7 +226,7 @@ class Add(CombinationKernel):
|
|||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
tmp = dL_dpsi2.sum(0)+ dL_dpsi2.sum(1) if len(dL_dpsi2.shape)==2 else dL_dpsi2.sum(2)+ dL_dpsi2.sum(1)
|
||||
|
||||
|
||||
if not self._exact_psicomp: return Kern.gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
from .static import White, Bias
|
||||
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
|
||||
|
|
@ -240,9 +239,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
eff_dL_dpsi1 += tmp * p2.variance
|
||||
else:
|
||||
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
|
||||
eff_dL_dpsi1 += tmp * p2.psi1(Z, variational_posterior)
|
||||
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
|
||||
return target_grads
|
||||
|
|
@ -255,7 +254,7 @@ class Add(CombinationKernel):
|
|||
# other.unlink_parameter(p)
|
||||
# parts.extend(other.parts)
|
||||
# #self.link_parameters(*other_params)
|
||||
#
|
||||
#
|
||||
# else:
|
||||
# #self.link_parameter(other)
|
||||
# parts.append(other)
|
||||
|
|
@ -271,7 +270,7 @@ class Add(CombinationKernel):
|
|||
else:
|
||||
|
||||
return super(Add, self).input_sensitivity(summarize)
|
||||
|
||||
|
||||
def sde_update_gradient_full(self, gradients):
|
||||
"""
|
||||
Update gradient in the order in which parameters are represented in the
|
||||
|
|
@ -283,12 +282,12 @@ class Add(CombinationKernel):
|
|||
part_param_num = len(p.param_array) # number of parameters in the part
|
||||
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
|
||||
part_start_param_index += part_param_num
|
||||
|
||||
|
||||
def sde(self):
|
||||
"""
|
||||
Support adding kernels for sde representation
|
||||
"""
|
||||
|
||||
|
||||
import scipy.linalg as la
|
||||
|
||||
F = None
|
||||
|
|
@ -312,51 +311,51 @@ class Add(CombinationKernel):
|
|||
L = la.block_diag(L,Lt) if (L is not None) else Lt
|
||||
Qc = la.block_diag(Qc,Qct) if (Qc is not None) else Qct
|
||||
H = np.hstack((H,Ht)) if (H is not None) else Ht
|
||||
|
||||
|
||||
Pinf = la.block_diag(Pinf,Pinft) if (Pinf is not None) else Pinft
|
||||
P0 = la.block_diag(P0,P0t) if (P0 is not None) else P0t
|
||||
|
||||
|
||||
if dF is not None:
|
||||
dF = np.pad(dF,((0,dFt.shape[0]),(0,dFt.shape[1]),(0,dFt.shape[2])),
|
||||
'constant', constant_values=0)
|
||||
dF[-dFt.shape[0]:,-dFt.shape[1]:,-dFt.shape[2]:] = dFt
|
||||
else:
|
||||
dF = dFt
|
||||
|
||||
|
||||
if dQc is not None:
|
||||
dQc = np.pad(dQc,((0,dQct.shape[0]),(0,dQct.shape[1]),(0,dQct.shape[2])),
|
||||
'constant', constant_values=0)
|
||||
dQc[-dQct.shape[0]:,-dQct.shape[1]:,-dQct.shape[2]:] = dQct
|
||||
else:
|
||||
dQc = dQct
|
||||
|
||||
|
||||
if dPinf is not None:
|
||||
dPinf = np.pad(dPinf,((0,dPinft.shape[0]),(0,dPinft.shape[1]),(0,dPinft.shape[2])),
|
||||
'constant', constant_values=0)
|
||||
dPinf[-dPinft.shape[0]:,-dPinft.shape[1]:,-dPinft.shape[2]:] = dPinft
|
||||
else:
|
||||
dPinf = dPinft
|
||||
|
||||
|
||||
if dP0 is not None:
|
||||
dP0 = np.pad(dP0,((0,dP0t.shape[0]),(0,dP0t.shape[1]),(0,dP0t.shape[2])),
|
||||
'constant', constant_values=0)
|
||||
dP0[-dP0t.shape[0]:,-dP0t.shape[1]:,-dP0t.shape[2]:] = dP0t
|
||||
else:
|
||||
dP0 = dP0t
|
||||
|
||||
|
||||
n += Ft.shape[0]
|
||||
nq += Qct.shape[0]
|
||||
nd += dFt.shape[2]
|
||||
|
||||
|
||||
assert (F.shape[0] == n and F.shape[1]==n), "SDE add: Check of F Dimensions failed"
|
||||
assert (L.shape[0] == n and L.shape[1]==nq), "SDE add: Check of L Dimensions failed"
|
||||
assert (Qc.shape[0] == nq and Qc.shape[1]==nq), "SDE add: Check of Qc Dimensions failed"
|
||||
assert (H.shape[0] == 1 and H.shape[1]==n), "SDE add: Check of H Dimensions failed"
|
||||
assert (Pinf.shape[0] == n and Pinf.shape[1]==n), "SDE add: Check of Pinf Dimensions failed"
|
||||
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
|
||||
assert (P0.shape[0] == n and P0.shape[1]==n), "SDE add: Check of P0 Dimensions failed"
|
||||
assert (dF.shape[0] == n and dF.shape[1]==n and dF.shape[2]==nd), "SDE add: Check of dF Dimensions failed"
|
||||
assert (dQc.shape[0] == nq and dQc.shape[1]==nq and dQc.shape[2]==nd), "SDE add: Check of dQc Dimensions failed"
|
||||
assert (dPinf.shape[0] == n and dPinf.shape[1]==n and dPinf.shape[2]==nd), "SDE add: Check of dPinf Dimensions failed"
|
||||
assert (dP0.shape[0] == n and dP0.shape[1]==n and dP0.shape[2]==nd), "SDE add: Check of dP0 Dimensions failed"
|
||||
|
||||
|
||||
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
|
||||
|
|
|
|||
|
|
@ -266,11 +266,11 @@ class Stationary(Kern):
|
|||
..returns:
|
||||
dL2_dXdX: [NxQxQ]
|
||||
"""
|
||||
dL_dK_diag = dL_dK_diag.reshape(-1, 1, 1)
|
||||
dL_dK_diag = dL_dK_diag.copy().reshape(-1, 1, 1)
|
||||
assert dL_dK_diag.size == X.shape[0], "dL_dK_diag has to be given as row [N] or column vector [Nx1]"
|
||||
|
||||
l2 = np.ones(X.shape[1])*self.lengthscale**2
|
||||
return (dL_dK_diag * self.variance/(l2[:,None]*l2[None,:]))# np.zeros(X.shape+(X.shape[1],))
|
||||
l4 = np.ones(X.shape[1])*self.lengthscale**2
|
||||
return dL_dK_diag * (np.eye(X.shape[1]) * self.variance/(l4))[None, :,:]# np.zeros(X.shape+(X.shape[1],))
|
||||
#return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||
|
||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||
|
|
|
|||
|
|
@ -135,10 +135,13 @@ class Kern_check_d2Kdiag_dXdX(Kern_check_model):
|
|||
self.Xc = X.copy()
|
||||
|
||||
def log_likelihood(self):
|
||||
return np.sum(self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X))
|
||||
l = 0.
|
||||
for i in range(self.X.shape[0]):
|
||||
l += self.kernel.gradients_X(self.dL_dK[[i],[i]], self.X[[i]], self.Xc[[i]]).sum()
|
||||
return l
|
||||
|
||||
def parameters_changed(self):
|
||||
grads = self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
|
||||
grads = -self.kernel.gradients_XX_diag(self.dL_dK.diagonal(), self.X)
|
||||
self.X.gradient[:] = grads.sum(-1)
|
||||
|
||||
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue