decent gradients for most parameters

This commit is contained in:
Nicolo Fusi 2013-01-28 15:55:40 +00:00
parent eb3061a9f0
commit 8b6e244cf1
5 changed files with 20 additions and 20 deletions

View file

@ -277,7 +277,7 @@ class kern(parameterised):
[p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dZ(partial[s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target return target
def dpsi2_dmuS(self,Z,mu,S,slices1=None,slices2=None): def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q""" """return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))

View file

@ -97,7 +97,7 @@ class rbf(kernpart):
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
@ -118,8 +118,8 @@ class rbf(kernpart):
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1) target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -140,12 +140,12 @@ class rbf(kernpart):
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _psi_computations(self,Z,mu,S): def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2 #here are the "statistics" for psi1 and psi2

View file

@ -76,7 +76,7 @@ class rbf_ARD(kernpart):
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1. target[0] += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
@ -92,21 +92,21 @@ class rbf_ARD(kernpart):
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target) # np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0) target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q""" """return shapes are N,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1) target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise) target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
def dpsi2_dtheta(self,Z,mu,S,target): def dpsi2_dtheta(self,partial,Z,mu,S,target):
"""Shape N,M,M,Ntheta""" """Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0) d_var = np.sum(2.*self._psi2/self.variance,0)
@ -115,18 +115,18 @@ class rbf_ARD(kernpart):
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0) target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
def dpsi2_dZ(self,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q""" """Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom) dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1) target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)): if not (np.all(X==self._X) and np.all(X2==self._X2)):

View file

@ -10,3 +10,4 @@ from GP_EP import GP_EP
from generalized_FITC import generalized_FITC from generalized_FITC import generalized_FITC
from sparse_GPLVM import sparse_GPLVM from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from BGPLVM import Bayesian_GPLVM

View file

@ -37,7 +37,7 @@ class sparse_GP_regression(GP_regression):
""" """
def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False): def __init__(self,X,Y,kernel=None, X_uncertainty=None, beta=100., Z=None,Zslices=None,M=10,normalize_X=False,normalize_Y=False):
self.scale_factor = 1e1 self.scale_factor = 10.0
self.beta = beta self.beta = beta
if Z is None: if Z is None:
self.Z = np.random.permutation(X.copy())[:M] self.Z = np.random.permutation(X.copy())[:M]
@ -70,7 +70,8 @@ class sparse_GP_regression(GP_regression):
self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum() self.psi0 = self.kern.psi0(self.Z,self.X, self.X_uncertainty).sum()
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T self.psi1 = self.kern.psi1(self.Z,self.X, self.X_uncertainty).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty) self.psi2 = self.kern.psi2(self.Z,self.X, self.X_uncertainty)
raise NotImplementedError, "scale psi2 (in kern?)" # raise NotImplementedError, "scale psi2 (in kern?)"
self.psi2_beta_scaled = self.psi2*(self.beta/self.scale_factor**2)
else: else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum() self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices).sum()
self.psi1 = self.kern.K(self.Z,self.X) self.psi1 = self.kern.K(self.Z,self.X)
@ -292,5 +293,3 @@ class sgp_debugE(sparse_GP_regression):
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V) tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dE_dbeta) return np.squeeze(dE_dbeta)