From 17bfccb45736a1877779218b43791de4e56a3a5e Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Fri, 6 May 2016 16:02:53 +0100 Subject: [PATCH] [gradxx] not working with X,X... --- GPy/core/gp.py | 2 +- GPy/kern/src/stationary.py | 46 +++++++++++++++++++++++-------------- GPy/plotting/__init__.py | 6 ++++- GPy/testing/kernel_tests.py | 10 ++++---- 4 files changed, 41 insertions(+), 23 deletions(-) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 78eafa3a..d706ed05 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -419,7 +419,7 @@ class GP(Model): mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac) Sigma = np.zeros(mumuT.shape) if var_jac.ndim == 4: # Missing data - Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1) + Sigma = var_jac.sum(-1) else: Sigma = self.output_dim*var_jac diff --git a/GPy/kern/src/stationary.py b/GPy/kern/src/stationary.py index 2a3c1e16..6ba62fdb 100644 --- a/GPy/kern/src/stationary.py +++ b/GPy/kern/src/stationary.py @@ -237,29 +237,37 @@ class Stationary(Kern): # d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2: invdist = self._inv_dist(X, X2) invdist2 = invdist**2 - dL_dr = self.dK_dr_via_X(X, X2) * dL_dK # we perofrm this product later + dL_dr = self.dK_dr_via_X(X, X2) * dL_dK # we perform this product later tmp1 = dL_dr * invdist dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK # we perofrm this product later - tmp2 = dL_drdr * invdist2 + tmp2 = dL_drdr l2 = np.ones(X.shape[1])*self.lengthscale**2 #np.multiply(np.ones(X.shape[1]) ,self.lengthscale**2) - - if X2 is None: - X2 = X - tmp1 -= np.eye(X.shape[0])*self.variance - else: - #tmp1[X==X2.T] -= self.variance # Old version, to be removed - # (seems to have a bug: it is subtracted to the first X1 anyway) - tmp1[invdist2==0.] -= self.variance + + tmp1[invdist2==0.] -= self.variance - tmp3 = (tmp1*invdist2 - tmp2) + tmp3 = (tmp1 - tmp2)*invdist2 + + #tmp3 = (tmp1 - tmp2)*invdist2 + + #tmp3 = tmp3 + # This is not quite right yet, I need the maths to fully understand what is going on.... + #else: if cov: # full covariance - dist = X[:,None,:] - X2[None,:,:] - t2 = (tmp3[:,:,None,None]*(dist[:,:,:,None]*dist[:,:,None,:]))/l2[None,None,:,None] + if X2 is None: + #tmp3 = tmp3+tmp3.T + dist = X[:,None,:] - X[None,:,:] + #dist = dist+dist.swapaxes(0,1) + else: + dist = X[:,None,:] - X2[None,:,:] + dist = (dist[:,:,:,None]*dist[:,:,None,:]) + + t2 = (tmp3[:,:,None,None]*dist)/l2[None,None,:,None] t2.T[np.diag_indices(self.input_dim)] -= tmp1.T[None,:,:] grad = t2/l2[None,None,None,:] + #grad_old = np.empty((X.shape[0], X2.shape[0], X2.shape[1], X.shape[1]), dtype=np.float64) - # + #for q in range(self.input_dim): # tmpdist = (X[:,[q]]-X2[:,[q]].T) # for r in range(self.input_dim): @@ -267,14 +275,18 @@ class Stationary(Kern): # if r==q: # grad_old[:, :, q, r] = ((tmp3 * tmpdist2)/l2[r] - tmp1)/l2[q] # else: - # grad_old[:, :, q, r] = (((tmp3 * tmpdist2)/l2[r])/l2[q]) + # grad_old[:, :, q, r] = (((tmp3 * tmpdist2)/l2[r])/l2[q]) + #import ipdb;ipdb.set_trace() + + if X2 is None: + grad += tmp1[:,:,None,None] else: # Diagonal covariance, old code grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64) #grad = np.empty(X.shape, dtype=np.float64) for q in range(self.input_dim): tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2 - grad[:, :, q] = np.multiply(dL_dK,(np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q]) + grad[:, :, q] = ((np.multiply((tmp1*invdist2 - tmp2),tmpdist2)/l2[q] - tmp1)/l2[q]) #grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q] #grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q] #np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q]) @@ -293,7 +305,7 @@ class Stationary(Kern): """ if cov: tmp = np.ones(X.shape+(X.shape[1],)) - return tmp * d2L_dK * self.variance/self.lengthscale**2# np.zeros(X.shape+(X.shape[1],)) + return tmp * (d2L_dK * self.variance/self.lengthscale**2)[:,None,None]# np.zeros(X.shape+(X.shape[1],)) return np.ones(X.shape) * d2L_dK * self.variance/self.lengthscale**2 # np.zeros(X.shape) def _gradients_X_pure(self, dL_dK, X, X2=None): diff --git a/GPy/plotting/__init__.py b/GPy/plotting/__init__.py index 067f5580..359a841a 100644 --- a/GPy/plotting/__init__.py +++ b/GPy/plotting/__init__.py @@ -50,6 +50,8 @@ def inject_plotting(): GP.plot_samples = gpy_plot.gp_plots.plot_samples GP.plot = gpy_plot.gp_plots.plot GP.plot_f = gpy_plot.gp_plots.plot_f + GP.plot_latent = gpy_plot.gp_plots.plot_f + GP.plot_noiseless = gpy_plot.gp_plots.plot_f GP.plot_magnification = gpy_plot.latent_plots.plot_magnification from ..models import StateSpace @@ -62,7 +64,9 @@ def inject_plotting(): StateSpace.plot_samples = gpy_plot.gp_plots.plot_samples StateSpace.plot = gpy_plot.gp_plots.plot StateSpace.plot_f = gpy_plot.gp_plots.plot_f - + StateSpace.plot_latent = gpy_plot.gp_plots.plot_f + StateSpace.plot_noiseless = gpy_plot.gp_plots.plot_f + from ..core import SparseGP SparseGP.plot_inducing = gpy_plot.data_plots.plot_inducing diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index d89f35e8..b3019de0 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -112,13 +112,15 @@ class Kern_check_d2K_dXdX_cov(Kern_check_model): self.link_parameter(self.X) def log_likelihood(self): - return np.sum(self.kernel.gradients_X(self.dL_dK,self.X, self.X2)) + return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).sum() def parameters_changed(self): #if self.kernel.name == 'rbf': - # import ipdb;ipdb.set_trace() - grads = self.kernel.gradients_XX(self.dL_dK, self.X, self.X2, cov=True) - self.X.gradient[:] = grads.sum(-1).sum(1) + # import ipdb;ipdb.set_trace() + if self.X2 is None: X2 = self.X + else: X2 = self.X2 + grads = self.kernel.gradients_XX(self.dL_dK.T, X2, self.X, cov=True) + self.X.gradient[:] = grads.sum(-1).sum(0) class Kern_check_d2Kdiag_dXdX_cov(Kern_check_model): """This class allows gradient checks for the second derivative of a kernel with respect to X. """