From 12d6f5056bf5f5e590fcf1c293fabc3c87f24ebf Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Mon, 11 Mar 2013 12:15:59 +0000 Subject: [PATCH] removed keyname partial --- GPy/kern/Matern32.py | 18 ++++----- GPy/kern/Matern52.py | 18 ++++----- GPy/kern/bias.py | 38 +++++++++--------- GPy/kern/coregionalise.py | 26 ++++++------ GPy/kern/exponential.py | 18 ++++----- GPy/kern/kern.py | 68 ++++++++++++++++---------------- GPy/kern/kernpart.py | 18 ++++----- GPy/kern/linear.py | 50 +++++++++++------------ GPy/kern/periodic_Matern32.py | 16 ++++---- GPy/kern/periodic_Matern52.py | 18 ++++----- GPy/kern/periodic_exponential.py | 18 ++++----- GPy/kern/product.py | 24 +++++------ GPy/kern/product_orthogonal.py | 24 +++++------ GPy/kern/rbf.py | 60 ++++++++++++++-------------- GPy/kern/symmetric.py | 24 +++++------ GPy/kern/white.py | 30 +++++++------- GPy/models/GP.py | 2 +- 17 files changed, 235 insertions(+), 235 deletions(-) diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py index c175009d..9503361d 100644 --- a/GPy/kern/Matern32.py +++ b/GPy/kern/Matern32.py @@ -76,7 +76,7 @@ class Matern32(kernpart): """Compute the diagonal of the covariance matrix associated to X.""" np.add(target,self.variance,target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) @@ -84,29 +84,29 @@ class Matern32(kernpart): invdist = 1./np.where(dist!=0.,dist,np.inf) dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*partial) + target[0] += np.sum(dvar*dL_dK) if self.ARD == True: dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) + target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) else: dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist #dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl*partial) + target[1] += np.sum(dl*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(partial) + target[0] += np.sum(dL_dKdiag) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*partial.T[:,:,None],0) + target += np.sum(dK_dX*dL_dK.T[:,:,None],0) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass def Gram_matrix(self,F,F1,F2,lower,upper): diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py index 26caad1c..377526d5 100644 --- a/GPy/kern/Matern52.py +++ b/GPy/kern/Matern52.py @@ -74,7 +74,7 @@ class Matern52(kernpart): """Compute the diagonal of the covariance matrix associated to X.""" np.add(target,self.variance,target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) @@ -82,29 +82,29 @@ class Matern52(kernpart): dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*partial) + target[0] += np.sum(dvar*dL_dK) if self.ARD: dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) + target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) else: dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist - target[1] += np.sum(dl*partial) + target[1] += np.sum(dl*dL_dKdiag) - def dKdiag_dtheta(self,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" - target[0] += np.sum(partial) + target[0] += np.sum(dL_dKdiag) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*partial.T[:,:,None],0) + target += np.sum(dK_dX*dL_dK.T[:,:,None],0) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass def Gram_matrix(self,F,F1,F2,F3,lower,upper): diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py index 91594e4c..07679abd 100644 --- a/GPy/kern/bias.py +++ b/GPy/kern/bias.py @@ -35,16 +35,17 @@ class bias(kernpart): def Kdiag(self,X,target): target += self.variance - def dK_dtheta(self,partial,X,X2,target): - target += partial.sum() + def dK_dtheta(self,dL_dKdiag,X,X2,target): + target += dL_dKdiag.sum() - def dKdiag_dtheta(self,partial,X,target): - target += partial.sum() - def dK_dX(self, partial,X, X2, target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): + target += dL_dKdiag.sum() + + def dK_dX(self, dL_dK,X, X2, target): pass - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass #---------------------------------------# @@ -60,30 +61,29 @@ class bias(kernpart): def psi2(self, Z, mu, S, target): target += self.variance**2 - def dpsi0_dtheta(self, partial, Z, mu, S, target): - target += partial.sum() + def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target): + target += dL_dpsi0.sum() - def dpsi1_dtheta(self, partial, Z, mu, S, target): - target += partial.sum() + def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target): + target += dL_dpsi1.sum() - def dpsi2_dtheta(self, partial, Z, mu, S, target): - target += 2.*self.variance*partial.sum() + def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): + target += 2.*self.variance*dL_dpsi2.sum() - - def dpsi0_dZ(self, partial, Z, mu, S, target): + def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target): pass - def dpsi0_dmuS(self, partial, Z, mu, S, target_mu, target_S): + def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S): pass - def dpsi1_dZ(self, partial, Z, mu, S, target): + def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target): pass - def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S): + def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S): pass - def dpsi2_dZ(self, partial, Z, mu, S, target): + def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): pass - def dpsi2_dmuS(self, partial, Z, mu, S, target_mu, target_S): + def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): pass diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py index 2a9177d5..a76bb31e 100644 --- a/GPy/kern/coregionalise.py +++ b/GPy/kern/coregionalise.py @@ -53,7 +53,7 @@ class coregionalise(kernpart): def Kdiag(self,index,target): target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] - def dK_dtheta(self,partial,index,index2,target): + def dK_dtheta(self,dL_dK,index,index2,target): index = np.asarray(index,dtype=np.int) if index2 is None: index2 = index @@ -62,28 +62,28 @@ class coregionalise(kernpart): ii,jj = np.meshgrid(index,index2) ii,jj = ii.T, jj.T - partial_small = np.zeros_like(self.B) + dL_dK_small = np.zeros_like(self.B) for i in range(self.Nout): for j in range(self.Nout): - tmp = np.sum(partial[(ii==i)*(jj==j)]) - partial_small[i,j] = tmp + tmp = np.sum(dL_dK[(ii==i)*(jj==j)]) + dL_dK_small[i,j] = tmp - dkappa = np.diag(partial_small) - partial_small += partial_small.T - dW = (self.W[:,None,:]*partial_small[:,:,None]).sum(0) + dkappa = np.diag(dL_dK_small) + dL_dK_small += dL_dK_small.T + dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0) target += np.hstack([dW.flatten(),dkappa]) - def dKdiag_dtheta(self,partial,index,target): + def dKdiag_dtheta(self,dL_dKdiag,index,target): index = np.asarray(index,dtype=np.int).flatten() - partial_small = np.zeros(self.Nout) + dL_dKdiag_small = np.zeros(self.Nout) for i in range(self.Nout): - partial_small[i] += np.sum(partial[index==i]) - dW = 2.*self.W*partial_small[:,None] - dkappa = partial_small + dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i]) + dW = 2.*self.W*dL_dKdiag_small[:,None] + dkappa = dL_dKdiag_small target += np.hstack([dW.flatten(),dkappa]) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): pass diff --git a/GPy/kern/exponential.py b/GPy/kern/exponential.py index 366ddf3b..9e50712b 100644 --- a/GPy/kern/exponential.py +++ b/GPy/kern/exponential.py @@ -74,35 +74,35 @@ class exponential(kernpart): """Compute the diagonal of the covariance matrix associated to X.""" np.add(target,self.variance,target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) invdist = 1./np.where(dist!=0.,dist,np.inf) dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dvar = np.exp(-dist) - target[0] += np.sum(dvar*partial) + target[0] += np.sum(dvar*dL_dK) if self.ARD == True: dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) + target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) else: dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl*partial) + target[1] += np.sum(dl*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" #NB: derivative of diagonal elements wrt lengthscale is 0 - target[0] += np.sum(partial) + target[0] += np.sum(dL_dKdiag) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*partial.T[:,:,None],0) + target += np.sum(dK_dX*dL_dK.T[:,:,None],0) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass def Gram_matrix(self,F,F1,lower,upper): diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 99ad46ea..c1f5eca9 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -271,10 +271,10 @@ class kern(parameterised): [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target - def dK_dtheta(self,partial,X,X2=None,slices1=None,slices2=None): + def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None): """ - :param partial: An array of partial derivaties, dL_dK - :type partial: Np.ndarray (N x M) + :param dL_dK: An array of dL_dK derivaties, dL_dK + :type dL_dK: Np.ndarray (N x M) :param X: Observed data inputs :type X: np.ndarray (N x D) :param X2: Observed dara inputs (optional, defaults to X) @@ -288,16 +288,16 @@ class kern(parameterised): if X2 is None: X2 = X target = np.zeros(self.Nparam) - [p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] + [p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] return self._transform_gradients(target) - def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None): + def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None): if X2 is None: X2 = X slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros_like(X) - [p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] + [p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] return target def Kdiag(self,X,slices=None): @@ -307,20 +307,20 @@ class kern(parameterised): [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] return target - def dKdiag_dtheta(self,partial,X,slices=None): + def dKdiag_dtheta(self,dL_dKdiag,X,slices=None): assert X.shape[1]==self.D - assert len(partial.shape)==1 - assert partial.size==X.shape[0] + assert len(dL_dKdiag.shape)==1 + assert dL_dKdiag.size==X.shape[0] slices = self._process_slices(slices,False) target = np.zeros(self.Nparam) - [p.dKdiag_dtheta(partial[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)] + [p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)] return self._transform_gradients(target) - def dKdiag_dX(self, partial, X, slices=None): + def dKdiag_dX(self, dL_dKdiag, X, slices=None): assert X.shape[1]==self.D slices = self._process_slices(slices,False) target = np.zeros_like(X) - [p.dKdiag_dX(partial[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] + [p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] return target def psi0(self,Z,mu,S,slices=None): @@ -329,16 +329,16 @@ class kern(parameterised): [p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)] return target - def dpsi0_dtheta(self,partial,Z,mu,S,slices=None): + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None): slices = self._process_slices(slices,False) target = np.zeros(self.Nparam) - [p.dpsi0_dtheta(partial[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)] + [p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)] return self._transform_gradients(target) - def dpsi0_dmuS(self,partial,Z,mu,S,slices=None): + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None): slices = self._process_slices(slices,False) target_mu,target_S = np.zeros_like(mu),np.zeros_like(S) - [p.dpsi0_dmuS(partial,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)] + [p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)] return target_mu,target_S def psi1(self,Z,mu,S,slices1=None,slices2=None): @@ -348,25 +348,25 @@ class kern(parameterised): [p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)] return target - def dpsi1_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None): """N,M,(Ntheta)""" slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros((self.Nparam)) - [p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)] + [p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)] return self._transform_gradients(target) - def dpsi1_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None): """N,M,Q""" slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros_like(Z) - [p.dpsi1_dZ(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target - def dpsi1_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None): """return shapes are N,M,Q""" slices1, slices2 = self._process_slices(slices1,slices2) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) - [p.dpsi1_dmuS(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] return target_mu, target_S def psi2(self,Z,mu,S,slices1=None,slices2=None): @@ -416,11 +416,11 @@ class kern(parameterised): return target + crossterms - def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None): + def dpsi2_dtheta(self,dL_dpsi2,partial1,Z,mu,S,slices1=None,slices2=None): """Returns shape (N,M,M,Ntheta)""" slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros(self.Nparam) - [p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] + [p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] #compute the "cross" terms #TODO: better looping @@ -434,11 +434,11 @@ class kern(parameterised): pass #rbf X bias elif p1.name=='bias' and p2.name=='rbf': - p2.dpsi1_dtheta(partial.sum(1)*p1.variance,Z,mu,S,target[ps2]) - p1.dpsi1_dtheta(partial.sum(1)*p2._psi1,Z,mu,S,target[ps1]) + p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target[ps2]) + p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1,Z,mu,S,target[ps1]) elif p2.name=='bias' and p1.name=='rbf': - p1.dpsi1_dtheta(partial.sum(1)*p2.variance,Z,mu,S,target[ps1]) - p2.dpsi1_dtheta(partial.sum(1)*p1._psi1,Z,mu,S,target[ps2]) + p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance,Z,mu,S,target[ps1]) + p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1,Z,mu,S,target[ps2]) #rbf X linear elif p1.name=='linear' and p2.name=='rbf': raise NotImplementedError #TODO @@ -469,10 +469,10 @@ class kern(parameterised): # target += (partial.sum(0)[:,:,None] * (tmp[:, None] + tmp[:,:,None]).sum(0)).sum(0).sum(0) return self._transform_gradients(target) - def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None): slices1, slices2 = self._process_slices(slices1,slices2) target = np.zeros_like(Z) - [p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] #compute the "cross" terms #TODO: slices (need to iterate around the input slices also...) @@ -482,9 +482,9 @@ class kern(parameterised): pass #rbf X bias elif p1.name=='bias' and p2.name=='rbf': - target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S) + target += p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S) elif p2.name=='bias' and p1.name=='rbf': - target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S) + target += p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S) #rbf X linear elif p1.name=='linear' and p2.name=='rbf': raise NotImplementedError #TODO @@ -496,11 +496,11 @@ class kern(parameterised): return target - def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None): """return shapes are N,M,M,Q""" slices1, slices2 = self._process_slices(slices1,slices2) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) - [p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] + [p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] #TODO: there are some extra terms to compute here! return target_mu, target_S diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py index 3a5486de..30a1cc3d 100644 --- a/GPy/kern/kernpart.py +++ b/GPy/kern/kernpart.py @@ -26,31 +26,31 @@ class kernpart(object): raise NotImplementedError def Kdiag(self,X,target): raise NotImplementedError - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): raise NotImplementedError - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): raise NotImplementedError def psi0(self,Z,mu,S,target): raise NotImplementedError - def dpsi0_dtheta(self,partial,Z,mu,S,target): + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): raise NotImplementedError - def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): raise NotImplementedError def psi1(self,Z,mu,S,target): raise NotImplementedError def dpsi1_dtheta(self,Z,mu,S,target): raise NotImplementedError - def dpsi1_dZ(self,partial,Z,mu,S,target): + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): raise NotImplementedError - def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): raise NotImplementedError def psi2(self,Z,mu,S,target): raise NotImplementedError - def dpsi2_dZ(self,partial,Z,mu,S,target): + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): raise NotImplementedError - def dpsi2_dtheta(self,partial,Z,mu,S,target): + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): raise NotImplementedError - def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): raise NotImplementedError def dK_dX(self,X,X2,target): raise NotImplementedError diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index df2fed46..7d817f62 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -73,16 +73,16 @@ class linear(kernpart): def Kdiag(self,X,target): np.add(target,np.sum(self.variances*np.square(X),-1),target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): if self.ARD: product = X[:,None,:]*X2[None,:,:] - target += (partial[:,:,None]*product).sum(0).sum(0) + target += (dL_dK[:,:,None]*product).sum(0).sum(0) else: self._K_computations(X, X2) - target += np.sum(self._dot_product*partial) + target += np.sum(self._dot_product*dL_dK) - def dK_dX(self,partial,X,X2,target): - target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0) + def dK_dX(self,dL_dK,X,X2,target): + target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0) #---------------------------------------# # PSI statistics # @@ -92,40 +92,40 @@ class linear(kernpart): self._psi_computations(Z,mu,S) target += np.sum(self.variances*self.mu2_S,1) - def dKdiag_dtheta(self,partial, X, target): - tmp = partial[:,None]*X**2 + def dKdiag_dtheta(self,dL_dKdiag, X, target): + tmp = dL_dKdiag[:,None]*X**2 if self.ARD: target += tmp.sum(0) else: target += tmp.sum() - def dpsi0_dtheta(self,partial,Z,mu,S,target): + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): self._psi_computations(Z,mu,S) - tmp = partial[:, None] * self.mu2_S + tmp = dL_dpsi0[:, None] * self.mu2_S if self.ARD: target += tmp.sum(0) else: target += tmp.sum() - def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S): - target_mu += partial[:, None] * (2.0*mu*self.variances) - target_S += partial[:, None] * self.variances + def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S): + target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances) + target_S += dL_dpsi0[:, None] * self.variances def psi1(self,Z,mu,S,target): """the variance, it does nothing""" self.K(mu,Z,target) - def dpsi1_dtheta(self,partial,Z,mu,S,target): + def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): """the variance, it does nothing""" - self.dK_dtheta(partial,mu,Z,target) + self.dK_dtheta(dL_dpsi1,mu,Z,target) - def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): """Do nothing for S, it does not affect psi1""" self._psi_computations(Z,mu,S) - target_mu += (partial.T[:,:, None]*(Z*self.variances)).sum(1) + target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1) - def dpsi1_dZ(self,partial,Z,mu,S,target): - self.dK_dX(partial.T,Z,mu,target) + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): + self.dK_dX(dL_dpsi1.T,Z,mu,target) def psi2(self,Z,mu,S,target): """ @@ -135,25 +135,25 @@ class linear(kernpart): psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] target += psi2.sum(-1) - def dpsi2_dtheta(self,partial,Z,mu,S,target): + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): self._psi_computations(Z,mu,S) - tmp = (partial[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances)) + tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances)) if self.ARD: target += tmp.sum(0).sum(0).sum(0) else: target += tmp.sum() - def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self.ZZ*np.square(self.variances) # M,M,Q - target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1) - target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1) + target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1) + target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1) - def dpsi2_dZ(self,partial,Z,mu,S,target): + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): self._psi_computations(Z,mu,S) mu2_S = np.sum(self.mu2_S,0)# Q, - target += (partial[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) + target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) #---------------------------------------# # Precomputations # diff --git a/GPy/kern/periodic_Matern32.py b/GPy/kern/periodic_Matern32.py index be1148c4..898dff7b 100644 --- a/GPy/kern/periodic_Matern32.py +++ b/GPy/kern/periodic_Matern32.py @@ -101,7 +101,7 @@ class periodic_Matern32(kernpart): FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -166,13 +166,13 @@ class periodic_Matern32(kernpart): dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) # np.add(target[:,:,0],dK_dvar, target[:,:,0]) - target[0] += np.sum(dK_dvar*partial) + target[0] += np.sum(dK_dvar*dL_dK) #np.add(target[:,:,1],dK_dlen, target[:,:,1]) - target[1] += np.sum(dK_dlen*partial) + target[1] += np.sum(dK_dlen*dL_dK) #np.add(target[:,:,2],dK_dper, target[:,:,2]) - target[2] += np.sum(dK_dper*partial) + target[2] += np.sum(dK_dper*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal covariance matrix with respect to the parameters""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -231,6 +231,6 @@ class periodic_Matern32(kernpart): dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) - target[0] += np.sum(np.diag(dK_dvar)*partial) - target[1] += np.sum(np.diag(dK_dlen)*partial) - target[2] += np.sum(np.diag(dK_dper)*partial) + target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag) + target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag) + target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag) diff --git a/GPy/kern/periodic_Matern52.py b/GPy/kern/periodic_Matern52.py index 8d1da8b1..c533961f 100644 --- a/GPy/kern/periodic_Matern52.py +++ b/GPy/kern/periodic_Matern52.py @@ -46,7 +46,7 @@ class periodic_Matern52(kernpart): r = np.sqrt(r1**2 + r2**2) psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2)) return r,omega[:,0:1], psi - + def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2): Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) ) Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower) @@ -105,7 +105,7 @@ class periodic_Matern52(kernpart): FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -178,13 +178,13 @@ class periodic_Matern52(kernpart): dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) # np.add(target[:,:,0],dK_dvar, target[:,:,0]) - target[0] += np.sum(dK_dvar*partial) + target[0] += np.sum(dK_dvar*dL_dK) #np.add(target[:,:,1],dK_dlen, target[:,:,1]) - target[1] += np.sum(dK_dlen*partial) + target[1] += np.sum(dK_dlen*dL_dK) #np.add(target[:,:,2],dK_dper, target[:,:,2]) - target[2] += np.sum(dK_dper*partial) + target[2] += np.sum(dK_dper*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -251,6 +251,6 @@ class periodic_Matern52(kernpart): dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper) dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) - target[0] += np.sum(np.diag(dK_dvar)*partial) - target[1] += np.sum(np.diag(dK_dlen)*partial) - target[2] += np.sum(np.diag(dK_dper)*partial) + target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag) + target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag) + target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag) diff --git a/GPy/kern/periodic_exponential.py b/GPy/kern/periodic_exponential.py index 7f566f25..b966bbef 100644 --- a/GPy/kern/periodic_exponential.py +++ b/GPy/kern/periodic_exponential.py @@ -101,7 +101,7 @@ class periodic_exponential(kernpart): FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" if X2 is None: X2 = X FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -162,11 +162,11 @@ class periodic_exponential(kernpart): dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) - target[0] += np.sum(dK_dvar*partial) - target[1] += np.sum(dK_dlen*partial) - target[2] += np.sum(dK_dper*partial) + target[0] += np.sum(dK_dvar*dL_dK) + target[1] += np.sum(dK_dlen*dL_dK) + target[2] += np.sum(dK_dper*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """derivative of the diagonal of the covariance matrix with respect to the parameters""" FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) @@ -222,7 +222,7 @@ class periodic_exponential(kernpart): dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) - target[0] += np.sum(np.diag(dK_dvar)*partial) - target[1] += np.sum(np.diag(dK_dlen)*partial) - target[2] += np.sum(np.diag(dK_dper)*partial) - + target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag) + target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag) + target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag) + diff --git a/GPy/kern/product.py b/GPy/kern/product.py index 92522418..3bad51c1 100644 --- a/GPy/kern/product.py +++ b/GPy/kern/product.py @@ -55,7 +55,7 @@ class product(kernpart): self.k2.Kdiag(X,target2) target += target1 * target2 - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X K1 = np.zeros((X.shape[0],X2.shape[0])) @@ -65,13 +65,13 @@ class product(kernpart): k1_target = np.zeros(self.k1.Nparam) k2_target = np.zeros(self.k2.Nparam) - self.k1.dK_dtheta(partial*K2, X, X2, k1_target) - self.k2.dK_dtheta(partial*K1, X, X2, k2_target) + self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target) + self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target) target[:self.k1.Nparam] += k1_target target[self.k1.Nparam:] += k2_target - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X K1 = np.zeros((X.shape[0],X2.shape[0])) @@ -79,19 +79,19 @@ class product(kernpart): self.k1.K(X,X2,K1) self.k2.K(X,X2,K2) - self.k1.dK_dX(partial*K2, X, X2, target) - self.k2.dK_dX(partial*K1, X, X2, target) + self.k1.dK_dX(dL_dK*K2, X, X2, target) + self.k2.dK_dX(dL_dK*K1, X, X2, target) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): target1 = np.zeros((X.shape[0],)) target2 = np.zeros((X.shape[0],)) self.k1.Kdiag(X,target1) self.k2.Kdiag(X,target2) - self.k1.dKdiag_dX(partial*target2, X, target) - self.k2.dKdiag_dX(partial*target1, X, target) + self.k1.dKdiag_dX(dL_dKdiag*target2, X, target) + self.k2.dKdiag_dX(dL_dKdiag*target1, X, target) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """Compute the diagonal of the covariance matrix associated to X.""" target1 = np.zeros((X.shape[0],)) target2 = np.zeros((X.shape[0],)) @@ -100,8 +100,8 @@ class product(kernpart): k1_target = np.zeros(self.k1.Nparam) k2_target = np.zeros(self.k2.Nparam) - self.k1.dKdiag_dtheta(partial*target2, X, k1_target) - self.k2.dKdiag_dtheta(partial*target1, X, k2_target) + self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target) + self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target) target[:self.k1.Nparam] += k1_target target[self.k1.Nparam:] += k2_target diff --git a/GPy/kern/product_orthogonal.py b/GPy/kern/product_orthogonal.py index a231cf8b..b0112199 100644 --- a/GPy/kern/product_orthogonal.py +++ b/GPy/kern/product_orthogonal.py @@ -46,7 +46,7 @@ class product_orthogonal(kernpart): self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2) target += target1 * target2 - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X K1 = np.zeros((X.shape[0],X2.shape[0])) @@ -54,8 +54,8 @@ class product_orthogonal(kernpart): self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1) self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2) - self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam]) - self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam]) + self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:]) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" @@ -65,15 +65,15 @@ class product_orthogonal(kernpart): self.k2.Kdiag(X[:,self.k1.D:],target2) target += target1 * target2 - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): K1 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0]) self.k1.Kdiag(X[:,:self.k1.D],K1) self.k2.Kdiag(X[:,self.k1.D:],K2) - self.k1.dKdiag_dtheta(partial*K2,X[:,:self.k1.D],target[:self.k1.Nparam]) - self.k2.dKdiag_dtheta(partial*K1,X[:,self.k1.D:],target[self.k1.Nparam:]) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:]) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X K1 = np.zeros((X.shape[0],X2.shape[0])) @@ -81,15 +81,15 @@ class product_orthogonal(kernpart): self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1) self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2) - self.k1.dK_dX(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target) - self.k2.dK_dX(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target) + self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target) + self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target) - def dKdiag_dX(self, partial, X, target): + def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0]) self.k1.Kdiag(X[:,0:self.k1.D],K1) self.k2.Kdiag(X[:,self.k1.D:],K2) - self.k1.dK_dX(partial*K2, X[:,:self.k1.D], target) - self.k2.dK_dX(partial*K1, X[:,self.k1.D:], target) + self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target) + self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index 16eda459..3c3d59e6 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -82,27 +82,27 @@ class rbf(kernpart): def Kdiag(self,X,target): np.add(target,self.variance,target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): self._K_computations(X,X2) - target[0] += np.sum(self._K_dvar*partial) + target[0] += np.sum(self._K_dvar*dL_dK) if self.ARD == True: dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale - target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) + target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) else: - target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial) - #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial) + target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*dL_dK) + #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK) - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): #NB: derivative of diagonal elements wrt lengthscale is 0 - target[0] += np.sum(partial) + target[0] += np.sum(dL_dKdiag) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): self._K_computations(X,X2) _K_dist = X[:,None,:]-X2[None,:,:] dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2)) - target += np.sum(dK_dX*partial.T[:,:,None],0) + target += np.sum(dK_dX*dL_dK.T[:,:,None],0) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass @@ -113,69 +113,69 @@ class rbf(kernpart): def psi0(self,Z,mu,S,target): target += self.variance - def dpsi0_dtheta(self,partial,Z,mu,S,target): - target[0] += np.sum(partial) + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + target[0] += np.sum(dL_dpsi0) - def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): pass def psi1(self,Z,mu,S,target): self._psi_computations(Z,mu,S) target += self._psi1 - def dpsi1_dtheta(self,partial,Z,mu,S,target): + def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): self._psi_computations(Z,mu,S) denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:]) d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv) - target[0] += np.sum(partial*self._psi1/self.variance) - dpsi1_dlength = d_length*partial[:,:,None] + target[0] += np.sum(dL_dpsi1*self._psi1/self.variance) + dpsi1_dlength = d_length*dL_dpsi1[:,:,None] if not self.ARD: target[1] += dpsi1_dlength.sum() else: target[1:] += dpsi1_dlength.sum(0).sum(0) - def dpsi1_dZ(self,partial,Z,mu,S,target): + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): self._psi_computations(Z,mu,S) denominator = (self.lengthscale2*(self._psi1_denom)) dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) - target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) + target += np.sum(dL_dpsi1.T[:,:,None] * dpsi1_dZ, 0) - def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): self._psi_computations(Z,mu,S) tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom - target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1) - target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1) + target_mu += np.sum(dL_dpsi1.T[:, :, None]*tmp*self._psi1_dist,1) + target_S += np.sum(dL_dpsi1.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1) def psi2(self,Z,mu,S,target): self._psi_computations(Z,mu,S) target += self._psi2 - def dpsi2_dtheta(self,partial,Z,mu,S,target): + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): """Shape N,M,M,Ntheta""" self._psi_computations(Z,mu,S) d_var = 2.*self._psi2/self.variance d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) - target[0] += np.sum(partial*d_var) - dpsi2_dlength = d_length*partial[:,:,:,None] + target[0] += np.sum(dL_dpsi2*d_var) + dpsi2_dlength = d_length*dL_dpsi2[:,:,:,None] if not self.ARD: target[1] += dpsi2_dlength.sum() else: target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) - - def dpsi2_dZ(self,partial,Z,mu,S,target): + + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): self._psi_computations(Z,mu,S) term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q dZ = self._psi2[:,:,:,None] * (term1[None] + term2) - target += (partial[:,:,:,None]*dZ).sum(0).sum(0) + target += (dL_dpsi2[:,:,:,None]*dZ).sum(0).sum(0) - def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): """Think N,M,M,Q """ self._psi_computations(Z,mu,S) tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom - target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) - target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) + target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) + target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) #---------------------------------------# diff --git a/GPy/kern/symmetric.py b/GPy/kern/symmetric.py index d493bfb1..c3b046c7 100644 --- a/GPy/kern/symmetric.py +++ b/GPy/kern/symmetric.py @@ -51,7 +51,7 @@ class symmetric(kernpart): self.k.K(X,AX2,target) self.k.K(AX,AX2,target) - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to the parameters.""" AX = np.dot(X,self.transform) if X2 is None: @@ -59,13 +59,13 @@ class symmetric(kernpart): ZX2 = AX else: AX2 = np.dot(X2, self.transform) - self.k.dK_dtheta(partial,X,X2,target) - self.k.dK_dtheta(partial,AX,X2,target) - self.k.dK_dtheta(partial,X,AX2,target) - self.k.dK_dtheta(partial,AX,AX2,target) + self.k.dK_dtheta(dL_dK,X,X2,target) + self.k.dK_dtheta(dL_dK,AX,X2,target) + self.k.dK_dtheta(dL_dK,X,AX2,target) + self.k.dK_dtheta(dL_dK,AX,AX2,target) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" AX = np.dot(X,self.transform) if X2 is None: @@ -73,10 +73,10 @@ class symmetric(kernpart): ZX2 = AX else: AX2 = np.dot(X2, self.transform) - self.k.dK_dX(partial, X, X2, target) - self.k.dK_dX(partial, AX, X2, target) - self.k.dK_dX(partial, X, AX2, target) - self.k.dK_dX(partial, AX ,AX2, target) + self.k.dK_dX(dL_dK, X, X2, target) + self.k.dK_dX(dL_dK, AX, X2, target) + self.k.dK_dX(dL_dK, X, AX2, target) + self.k.dK_dX(dL_dK, AX ,AX2, target) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" @@ -84,9 +84,9 @@ class symmetric(kernpart): self.K(X,X,foo) target += np.diag(foo) - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): raise NotImplementedError - def dKdiag_dtheta(self,partial,X,target): + def dKdiag_dtheta(self,dL_dKdiag,X,target): """Compute the diagonal of the covariance matrix associated to X.""" raise NotImplementedError diff --git a/GPy/kern/white.py b/GPy/kern/white.py index b3b00c48..f5d6894a 100644 --- a/GPy/kern/white.py +++ b/GPy/kern/white.py @@ -37,50 +37,50 @@ class white(kernpart): def Kdiag(self,X,target): target += self.variance - def dK_dtheta(self,partial,X,X2,target): + def dK_dtheta(self,dL_dK,X,X2,target): if X.shape==X2.shape: if np.all(X==X2): - target += np.trace(partial) + target += np.trace(dL_dK) - def dKdiag_dtheta(self,partial,X,target): - target += np.sum(partial) + def dKdiag_dtheta(self,dL_dKdiag,X,target): + target += np.sum(dL_dKdiag) - def dK_dX(self,partial,X,X2,target): + def dK_dX(self,dL_dK,X,X2,target): pass - def dKdiag_dX(self,partial,X,target): + def dKdiag_dX(self,dL_dKdiag,X,target): pass def psi0(self,Z,mu,S,target): target += self.variance - def dpsi0_dtheta(self,partial,Z,mu,S,target): - target += partial.sum() + def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target): + target += dL_dpsi0.sum() - def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S): pass def psi1(self,Z,mu,S,target): pass - def dpsi1_dtheta(self,partial,Z,mu,S,target): + def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target): pass - def dpsi1_dZ(self,partial,Z,mu,S,target): + def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target): pass - def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S): pass def psi2(self,Z,mu,S,target): pass - def dpsi2_dZ(self,partial,Z,mu,S,target): + def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target): pass - def dpsi2_dtheta(self,partial,Z,mu,S,target): + def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target): pass - def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): + def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S): pass diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 08ac1bb1..1d985c33 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -129,7 +129,7 @@ class GP(model): For the likelihood parameters, pass in alpha = K^-1 y """ - return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) + return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) def _raw_predict(self,_Xnew,slices=None, full_cov=False): """