mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-30 15:26:23 +02:00
[gradxx] not working with X,X...
This commit is contained in:
parent
b16d57f560
commit
17bfccb457
4 changed files with 41 additions and 23 deletions
|
|
@ -419,7 +419,7 @@ class GP(Model):
|
||||||
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
|
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
|
||||||
Sigma = np.zeros(mumuT.shape)
|
Sigma = np.zeros(mumuT.shape)
|
||||||
if var_jac.ndim == 4: # Missing data
|
if var_jac.ndim == 4: # Missing data
|
||||||
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
|
Sigma = var_jac.sum(-1)
|
||||||
else:
|
else:
|
||||||
Sigma = self.output_dim*var_jac
|
Sigma = self.output_dim*var_jac
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -237,29 +237,37 @@ class Stationary(Kern):
|
||||||
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
|
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
|
||||||
invdist = self._inv_dist(X, X2)
|
invdist = self._inv_dist(X, X2)
|
||||||
invdist2 = invdist**2
|
invdist2 = invdist**2
|
||||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK # we perofrm this product later
|
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK # we perform this product later
|
||||||
tmp1 = dL_dr * invdist
|
tmp1 = dL_dr * invdist
|
||||||
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK # we perofrm this product later
|
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK # we perofrm this product later
|
||||||
tmp2 = dL_drdr * invdist2
|
tmp2 = dL_drdr
|
||||||
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
|
l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2)
|
||||||
|
|
||||||
if X2 is None:
|
tmp1[invdist2==0.] -= self.variance
|
||||||
X2 = X
|
|
||||||
tmp1 -= np.eye(X.shape[0])*self.variance
|
|
||||||
else:
|
|
||||||
#tmp1[X==X2.T] -= self.variance # Old version, to be removed
|
|
||||||
# (seems to have a bug: it is subtracted to the first X1 anyway)
|
|
||||||
tmp1[invdist2==0.] -= self.variance
|
|
||||||
|
|
||||||
tmp3 = (tmp1*invdist2 - tmp2)
|
tmp3 = (tmp1 - tmp2)*invdist2
|
||||||
|
|
||||||
|
#tmp3 = (tmp1 - tmp2)*invdist2
|
||||||
|
|
||||||
|
#tmp3 = tmp3
|
||||||
|
# This is not quite right yet, I need the maths to fully understand what is going on....
|
||||||
|
#else:
|
||||||
|
|
||||||
if cov: # full covariance
|
if cov: # full covariance
|
||||||
dist = X[:,None,:] - X2[None,:,:]
|
if X2 is None:
|
||||||
t2 = (tmp3[:,:,None,None]*(dist[:,:,:,None]*dist[:,:,None,:]))/l2[None,None,:,None]
|
#tmp3 = tmp3+tmp3.T
|
||||||
|
dist = X[:,None,:] - X[None,:,:]
|
||||||
|
#dist = dist+dist.swapaxes(0,1)
|
||||||
|
else:
|
||||||
|
dist = X[:,None,:] - X2[None,:,:]
|
||||||
|
dist = (dist[:,:,:,None]*dist[:,:,None,:])
|
||||||
|
|
||||||
|
t2 = (tmp3[:,:,None,None]*dist)/l2[None,None,:,None]
|
||||||
t2.T[np.diag_indices(self.input_dim)] -= tmp1.T[None,:,:]
|
t2.T[np.diag_indices(self.input_dim)] -= tmp1.T[None,:,:]
|
||||||
grad = t2/l2[None,None,None,:]
|
grad = t2/l2[None,None,None,:]
|
||||||
|
|
||||||
#grad_old = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
|
#grad_old = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64)
|
||||||
#
|
|
||||||
#for q in range(self.input_dim):
|
#for q in range(self.input_dim):
|
||||||
# tmpdist = (X[:,[q]]-X2[:,[q]].T)
|
# tmpdist = (X[:,[q]]-X2[:,[q]].T)
|
||||||
# for r in range(self.input_dim):
|
# for r in range(self.input_dim):
|
||||||
|
|
@ -268,13 +276,17 @@ class Stationary(Kern):
|
||||||
# grad_old[:, :, q, r] = ((tmp3 * tmpdist2)/l2[r] - tmp1)/l2[q]
|
# grad_old[:, :, q, r] = ((tmp3 * tmpdist2)/l2[r] - tmp1)/l2[q]
|
||||||
# else:
|
# else:
|
||||||
# grad_old[:, :, q, r] = (((tmp3 * tmpdist2)/l2[r])/l2[q])
|
# grad_old[:, :, q, r] = (((tmp3 * tmpdist2)/l2[r])/l2[q])
|
||||||
|
#import ipdb;ipdb.set_trace()
|
||||||
|
|
||||||
|
if X2 is None:
|
||||||
|
grad += tmp1[:,:,None,None]
|
||||||
else:
|
else:
|
||||||
# Diagonal covariance, old code
|
# Diagonal covariance, old code
|
||||||
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||||
#grad = np.empty(X.shape, dtype=np.float64)
|
#grad = np.empty(X.shape, dtype=np.float64)
|
||||||
for q in range(self.input_dim):
|
for q in range(self.input_dim):
|
||||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||||
grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
|
grad[:, :, q] = ((np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q])
|
||||||
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
#grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||||
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||||
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||||
|
|
@ -293,7 +305,7 @@ class Stationary(Kern):
|
||||||
"""
|
"""
|
||||||
if cov:
|
if cov:
|
||||||
tmp = np.ones(X.shape+(X.shape[1],))
|
tmp = np.ones(X.shape+(X.shape[1],))
|
||||||
return tmp * d2L_dK * self.variance/self.lengthscale**2# np.zeros(X.shape+(X.shape[1],))
|
return tmp * (d2L_dK * self.variance/self.lengthscale**2)[:,None,None]# np.zeros(X.shape+(X.shape[1],))
|
||||||
return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape)
|
||||||
|
|
||||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,8 @@ def inject_plotting():
|
||||||
GP.plot_samples = gpy_plot.gp_plots.plot_samples
|
GP.plot_samples = gpy_plot.gp_plots.plot_samples
|
||||||
GP.plot = gpy_plot.gp_plots.plot
|
GP.plot = gpy_plot.gp_plots.plot
|
||||||
GP.plot_f = gpy_plot.gp_plots.plot_f
|
GP.plot_f = gpy_plot.gp_plots.plot_f
|
||||||
|
GP.plot_latent = gpy_plot.gp_plots.plot_f
|
||||||
|
GP.plot_noiseless = gpy_plot.gp_plots.plot_f
|
||||||
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
|
GP.plot_magnification = gpy_plot.latent_plots.plot_magnification
|
||||||
|
|
||||||
from ..models import StateSpace
|
from ..models import StateSpace
|
||||||
|
|
@ -62,6 +64,8 @@ def inject_plotting():
|
||||||
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
|
StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples
|
||||||
StateSpace.plot = gpy_plot.gp_plots.plot
|
StateSpace.plot = gpy_plot.gp_plots.plot
|
||||||
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
|
StateSpace.plot_f = gpy_plot.gp_plots.plot_f
|
||||||
|
StateSpace.plot_latent = gpy_plot.gp_plots.plot_f
|
||||||
|
StateSpace.plot_noiseless = gpy_plot.gp_plots.plot_f
|
||||||
|
|
||||||
from ..core import SparseGP
|
from ..core import SparseGP
|
||||||
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
|
SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing
|
||||||
|
|
|
||||||
|
|
@ -112,13 +112,15 @@ class Kern_check_d2K_dXdX_cov(Kern_check_model):
|
||||||
self.link_parameter(self.X)
|
self.link_parameter(self.X)
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return np.sum(self.kernel.gradients_X(self.dL_dK,self.X, self.X2))
|
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum()
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
#if self.kernel.name == 'rbf':
|
#if self.kernel.name == 'rbf':
|
||||||
# import ipdb;ipdb.set_trace()
|
# import ipdb;ipdb.set_trace()
|
||||||
grads = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2, cov=True)
|
if self.X2 is None: X2 = self.X
|
||||||
self.X.gradient[:] = grads.sum(-1).sum(1)
|
else: X2 = self.X2
|
||||||
|
grads = self.kernel.gradients_XX(self.dL_dK.T, X2, self.X, cov=True)
|
||||||
|
self.X.gradient[:] = grads.sum(-1).sum(0)
|
||||||
|
|
||||||
class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model):
|
class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model):
|
||||||
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """
|
"""This class allows gradient checks for the second derivative of a kernel with respect to X. """
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue