mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
psi1 is now the right way around
This commit is contained in:
parent
a5208f474e
commit
5e4c24d652
5 changed files with 24 additions and 25 deletions
|
|
@ -54,11 +54,11 @@ class SparseGP(GPBase):
|
||||||
self.Kmm = self.kern.K(self.Z)
|
self.Kmm = self.kern.K(self.Z)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
|
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
|
||||||
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance).T
|
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance)
|
||||||
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
|
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
self.psi0 = self.kern.Kdiag(self.X)
|
self.psi0 = self.kern.Kdiag(self.X)
|
||||||
self.psi1 = self.kern.K(self.Z, self.X)
|
self.psi1 = self.kern.K(self.X, self.Z)
|
||||||
self.psi2 = None
|
self.psi2 = None
|
||||||
|
|
||||||
def _computations(self):
|
def _computations(self):
|
||||||
|
|
@ -75,12 +75,13 @@ class SparseGP(GPBase):
|
||||||
evals, evecs = linalg.eigh(psi2_beta)
|
evals, evecs = linalg.eigh(psi2_beta)
|
||||||
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||||
tmp = evecs * np.sqrt(clipped_evals)
|
tmp = evecs * np.sqrt(clipped_evals)
|
||||||
|
tmp = tmp.T
|
||||||
else:
|
else:
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data)))
|
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data,1)))
|
||||||
else:
|
else:
|
||||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
|
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
|
||||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp.T), lower=1)
|
||||||
self.A = tdot(tmp)
|
self.A = tdot(tmp)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -89,7 +90,7 @@ class SparseGP(GPBase):
|
||||||
self.LB = jitchol(self.B)
|
self.LB = jitchol(self.B)
|
||||||
|
|
||||||
# TODO: make a switch for either first compute psi1V, or VV.T
|
# TODO: make a switch for either first compute psi1V, or VV.T
|
||||||
self.psi1V = np.dot(self.psi1, self.likelihood.V)
|
self.psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
||||||
|
|
||||||
# back substutue C into psi1V
|
# back substutue C into psi1V
|
||||||
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
tmp, info1 = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
||||||
|
|
@ -107,14 +108,14 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||||
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
|
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
|
||||||
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
|
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T).T
|
||||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
||||||
|
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||||
else:
|
else:
|
||||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data))
|
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T
|
||||||
self.dL_dpsi2 = None
|
self.dL_dpsi2 = None
|
||||||
else:
|
else:
|
||||||
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
|
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
|
||||||
|
|
@ -123,7 +124,7 @@ class SparseGP(GPBase):
|
||||||
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
|
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
|
||||||
else:
|
else:
|
||||||
# subsume back into psi1 (==Kmn)
|
# subsume back into psi1 (==Kmn)
|
||||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
|
self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2)
|
||||||
self.dL_dpsi2 = None
|
self.dL_dpsi2 = None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -179,10 +180,10 @@ class SparseGP(GPBase):
|
||||||
Kmmi = tdot(Lmi.T)
|
Kmmi = tdot(Lmi.T)
|
||||||
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
|
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
|
||||||
|
|
||||||
self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
|
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
|
||||||
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
|
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
|
||||||
else:
|
else:
|
||||||
self.likelihood.fit_DTC(self.Kmm, self.psi1)
|
self.likelihood.fit_DTC(self.Kmm, self.psi1.T)
|
||||||
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
||||||
self._set_params(self._get_params()) # update the GP
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
|
|
@ -196,10 +197,10 @@ class SparseGP(GPBase):
|
||||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
||||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1.T, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X)
|
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
|
||||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
|
||||||
|
|
||||||
return dL_dtheta
|
return dL_dtheta
|
||||||
|
|
@ -213,7 +214,7 @@ class SparseGP(GPBase):
|
||||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||||
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
|
dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
|
||||||
return dL_dZ
|
return dL_dZ
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||||
|
|
@ -233,9 +234,9 @@ class SparseGP(GPBase):
|
||||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
||||||
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
|
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
|
||||||
else:
|
else:
|
||||||
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
|
# assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented"
|
||||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
||||||
mu = np.dot(Kx, self.Cpsi1V)
|
mu = np.dot(Kx.T, self.Cpsi1V)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
raise NotImplementedError, "TODO"
|
raise NotImplementedError, "TODO"
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -360,9 +360,7 @@ class kern(Parameterised):
|
||||||
raise NotImplementedError("We don't handle linear/linear cross-terms")
|
raise NotImplementedError("We don't handle linear/linear cross-terms")
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
p1.psi1(Z, mu, S, tmp)
|
p1.psi1(Z, mu, S, tmp)
|
||||||
tmp2 = np.zeros_like(target)
|
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
|
||||||
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1).T, Z, mu, S, tmp2)
|
|
||||||
target += tmp2
|
|
||||||
|
|
||||||
return target * 2
|
return target * 2
|
||||||
|
|
||||||
|
|
@ -378,7 +376,7 @@ class kern(Parameterised):
|
||||||
|
|
||||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
p1.psi1(Z, mu, S, tmp)
|
p1.psi1(Z, mu, S, tmp)
|
||||||
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1).T * 2., Z, mu, S, target_mu, target_S)
|
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
|
||||||
|
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ class linear(Kernpart):
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dK_dX(self, dL_dK, X, X2, target):
|
def dK_dX(self, dL_dK, X, X2, target):
|
||||||
target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0)
|
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
|
|
@ -131,7 +131,7 @@ class linear(Kernpart):
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||||
"""Do nothing for S, it does not affect psi1"""
|
"""Do nothing for S, it does not affect psi1"""
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1)
|
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||||
|
|
||||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||||
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||||
|
|
|
||||||
|
|
@ -178,13 +178,13 @@ class rbf(Kernpart):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||||
target += np.sum(dL_dpsi1.T[:, :, None] * dpsi1_dZ, 0)
|
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||||
|
|
||||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||||
target_mu += np.sum(dL_dpsi1.T[:, :, None] * tmp * self._psi1_dist, 1)
|
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||||
target_S += np.sum(dL_dpsi1.T[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||||
|
|
||||||
def psi2(self, Z, mu, S, target):
|
def psi2(self, Z, mu, S, target):
|
||||||
self._psi_computations(Z, mu, S)
|
self._psi_computations(Z, mu, S)
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
||||||
|
|
||||||
def dL_dX(self):
|
def dL_dX(self):
|
||||||
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
|
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
|
||||||
dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z)
|
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z)
|
||||||
|
|
||||||
return dL_dX
|
return dL_dX
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue