[huge merge] trying to merge old master and master

This commit is contained in:
Max Zwiessele 2014-11-21 16:17:03 +00:00
commit 180650ec85
308 changed files with 27071 additions and 24550 deletions

282
GPy/kern/_src/ODE_UY.py Normal file
View file

@ -0,0 +1,282 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UY(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UY, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_Y, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
self.link_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape
dktheta1 = np.zeros(rd)
dktheta2 = np.zeros(rd)
dkUdvar = np.zeros(rd)
dkYdvar = np.zeros(rd)
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly

290
GPy/kern/_src/ODE_UYC.py Normal file
View file

@ -0,0 +1,290 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UYC(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UYC, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_U, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_U, Logexp())
self.ubias = Param('ubias', ubias, Logexp())
self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U, self.ubias)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#stop
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) +self.ubias
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U + self.ubias
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape[0]
dktheta1 = np.zeros([rd,rd])
dktheta2 = np.zeros([rd,rd])
dkUdvar = np.zeros([rd,rd])
dkYdvar = np.zeros([rd,rd])
dkdubias = np.zeros([rd,rd])
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
dkdubias[ss1,ss2] = 1
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
dkdubias[ss1,ss2] = 0
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkdubias[ss1,ss2] = 0
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
dkdubias[ss1,ss2] = 0
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly
self.ubias.gradient = np.sum(dkdubias * dL_dK)

267
GPy/kern/_src/ODE_st.py Normal file
View file

@ -0,0 +1,267 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_st(Kern):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim, a=1.,b=1., c=1.,variance_Yx=3.,variance_Yt=1.5, lengthscale_Yx=1.5, lengthscale_Yt=1.5, active_dims=None, name='ode_st'):
assert input_dim ==3, "only defined for 3 input dims"
super(ODE_st, self).__init__(input_dim, active_dims, name)
self.variance_Yt = Param('variance_Yt', variance_Yt, Logexp())
self.variance_Yx = Param('variance_Yx', variance_Yx, Logexp())
self.lengthscale_Yt = Param('lengthscale_Yt', lengthscale_Yt, Logexp())
self.lengthscale_Yx = Param('lengthscale_Yx', lengthscale_Yx, Logexp())
self.a= Param('a', a, Logexp())
self.b = Param('b', b, Logexp())
self.c = Param('c', c, Logexp())
self.add_parameters(self.a, self.b, self.c, self.variance_Yt, self.variance_Yx, self.lengthscale_Yt,self.lengthscale_Yx)
def K(self, X, X2=None):
# model : -a d^2y/dx^2 + b dy/dt + c * y = U
# kernel Kyy rbf spatiol temporal
# vyt Y temporal variance vyx Y spatiol variance lyt Y temporal lengthscale lyx Y spatiol lengthscale
# kernel Kuu doper( doper(Kyy))
# a b c lyt lyx vyx*vyt
"""Compute the covariance matrix between X and X2."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
xdist = (X[:,1][:,None] - X2[:,1][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
#rdist = [tdist,xdist]
#dist = np.abs(X - X2.T)
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt=1/(2*self.lengthscale_Yt)
lyx=1/(2*self.lengthscale_Yx)
a = self.a ## -a is used in the model, negtive diffusion
b = self.b
c = self.c
kyy = lambda tdist,xdist: np.exp(-lyt*(tdist) -lyx*(xdist))
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k2 = lambda xdist: ( 4*lyx**2 * (xdist) - 2*lyx )
k3 = lambda xdist: ( 3*4*lyx**2 - 6*8*xdist*lyx**3 + 16*xdist**2*lyx**4 )
k4 = lambda ttdist: 2*lyt*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
elif i==0 and j==1:
K[ss1,ss2] = (-a*k2(xdist[ss1,ss2]) + b*k4(ttdist[ss1,ss2]) + c)*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )* vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
else:
K[ss1,ss2] = (-a*k2(xdist[ss1,ss2]) - b*k4(ttdist[ss1,ss2]) + c)*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
#K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
#stop
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt = 1./(2*self.lengthscale_Yt)
lyx = 1./(2*self.lengthscale_Yx)
a = self.a
b = self.b
c = self.c
## dk^2/dtdt'
k1 = (2*lyt )*vyt*vyx
## dk^2/dx^2
k2 = ( - 2*lyx )*vyt*vyx
## dk^4/dx^2dx'^2
k3 = ( 4*3*lyx**2 )*vyt*vyx
Kdiag = np.zeros(X.shape[0])
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= vyt*vyx
elif i==1:
#i=1
Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
#def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt = 1./(2*self.lengthscale_Yt)
lyx = 1./(2*self.lengthscale_Yx)
a = self.a
b = self.b
c = self.c
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
xdist = (X[:,1][:,None] - X2[:,1][None,:])**2
#rdist = [tdist,xdist]
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
rd=tdist.shape[0]
dka = np.zeros([rd,rd])
dkb = np.zeros([rd,rd])
dkc = np.zeros([rd,rd])
dkYdvart = np.zeros([rd,rd])
dkYdvarx = np.zeros([rd,rd])
dkYdlent = np.zeros([rd,rd])
dkYdlenx = np.zeros([rd,rd])
kyy = lambda tdist,xdist: np.exp(-lyt*(tdist) -lyx*(xdist))
#k1 = lambda tdist: (lyt - lyt**2 * (tdist) )
#k2 = lambda xdist: ( lyx**2 * (xdist) - lyx )
#k3 = lambda xdist: ( 3*lyx**2 - 6*xdist*lyx**3 + xdist**2*lyx**4 )
#k4 = lambda tdist: -lyt*np.sqrt(tdist)
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k2 = lambda xdist: ( 4*lyx**2 * (xdist) - 2*lyx )
k3 = lambda xdist: ( 3*4*lyx**2 - 6*8*xdist*lyx**3 + 16*xdist**2*lyx**4 )
k4 = lambda ttdist: 2*lyt*(ttdist)
dkyydlyx = lambda tdist,xdist: kyy(tdist,xdist)*(-xdist)
dkyydlyt = lambda tdist,xdist: kyy(tdist,xdist)*(-tdist)
dk1dlyt = lambda tdist: 2. - 4*2.*lyt*tdist
dk2dlyx = lambda xdist: (4.*2.*lyx*xdist -2.)
dk3dlyx = lambda xdist: (6.*4.*lyx - 18.*8*xdist*lyx**2 + 4*16*xdist**2*lyx**3)
dk4dlyt = lambda ttdist: 2*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
dka[ss1,ss2] = 0
dkb[ss1,ss2] = 0
dkc[ss1,ss2] = 0
dkYdvart[ss1,ss2] = vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])
elif i==0 and j==1:
dka[ss1,ss2] = -k2(xdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = k4(ttdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#dkYdvart[ss1,ss2] = 0
#dkYdvarx[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = 0
#dkYdlenx[ss1,ss2] = 0
dkYdvart[ss1,ss2] = (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])* (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*b*dk4dlyt(ttdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*dk2dlyx(xdist[ss1,ss2]))
elif i==1 and j==1:
dka[ss1,ss2] = (2*a*k3(xdist[ss1,ss2]) - 2*c*k2(xdist[ss1,ss2]))*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = 2*b*k1(tdist[ss1,ss2])*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = (-2*a*k2(xdist[ss1,ss2]) + 2*c )*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvart[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )*vyt* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])*( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 ) +\
vyx*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*b**2*dk1dlyt(tdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 ) +\
vyx*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])* (-2*a*c*dk2dlyx(xdist[ss1,ss2]) + a**2*dk3dlyx(xdist[ss1,ss2]) )
else:
dka[ss1,ss2] = -k2(xdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = -k4(ttdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#dkYdvart[ss1,ss2] = 0
#dkYdvarx[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = 0
#dkYdlenx[ss1,ss2] = 0
dkYdvart[ss1,ss2] = (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])* (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-1)*b*dk4dlyt(ttdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*dk2dlyx(xdist[ss1,ss2]))
self.a.gradient = np.sum(dka * dL_dK)
self.b.gradient = np.sum(dkb * dL_dK)
self.c.gradient = np.sum(dkc * dL_dK)
self.variance_Yt.gradient = np.sum(dkYdvart * dL_dK) # Vy
self.variance_Yx.gradient = np.sum(dkYdvarx * dL_dK)
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK) #ly np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK)
self.lengthscale_Yx.gradient = np.sum(dkYdlenx*(-0.5*self.lengthscale_Yx**(-2)) * dL_dK)

165
GPy/kern/_src/ODE_t.py Normal file
View file

@ -0,0 +1,165 @@
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_t(Kern):
def __init__(self, input_dim, a=1., c=1.,variance_Yt=3., lengthscale_Yt=1.5,ubias =1., active_dims=None, name='ode_st'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_t, self).__init__(input_dim, active_dims, name)
self.variance_Yt = Param('variance_Yt', variance_Yt, Logexp())
self.lengthscale_Yt = Param('lengthscale_Yt', lengthscale_Yt, Logexp())
self.a= Param('a', a, Logexp())
self.c = Param('c', c, Logexp())
self.ubias = Param('ubias', ubias, Logexp())
self.add_parameters(self.a, self.c, self.variance_Yt, self.lengthscale_Yt,self.ubias)
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
vyt = self.variance_Yt
lyt=1/(2*self.lengthscale_Yt)
a = -self.a
c = self.c
kyy = lambda tdist: np.exp(-lyt*(tdist))
k1 = lambda tdist: (2*lyt - 4*lyt**2 *(tdist) )
k4 = lambda tdist: 2*lyt*(tdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = vyt*kyy(tdist[ss1,ss2])
elif i==0 and j==1:
K[ss1,ss2] = (k4(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#K[ss1,ss2] = (2*lyt*(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
elif i==1 and j==1:
K[ss1,ss2] = ( k1(tdist[ss1,ss2]) + 1. )*vyt* kyy(tdist[ss1,ss2])+self.ubias
else:
K[ss1,ss2] = (-k4(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#K[ss1,ss2] = (-2*lyt*(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#stop
return K
def Kdiag(self, X):
vyt = self.variance_Yt
lyt = 1./(2*self.lengthscale_Yt)
a = -self.a
c = self.c
k1 = (2*lyt )*vyt
Kdiag = np.zeros(X.shape[0])
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= vyt
elif i==1:
#i=1
Kdiag[s1]+= k1 + vyt+self.ubias
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
vyt = self.variance_Yt
lyt = 1./(2*self.lengthscale_Yt)
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
#rdist = [tdist,xdist]
rd=tdist.shape[0]
dka = np.zeros([rd,rd])
dkc = np.zeros([rd,rd])
dkYdvart = np.zeros([rd,rd])
dkYdlent = np.zeros([rd,rd])
dkdubias = np.zeros([rd,rd])
kyy = lambda tdist: np.exp(-lyt*(tdist))
dkyydlyt = lambda tdist: kyy(tdist)*(-tdist)
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k4 = lambda ttdist: 2*lyt*(ttdist)
dk1dlyt = lambda tdist: 2. - 4*2.*lyt*tdist
dk4dlyt = lambda ttdist: 2*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
dkYdvart[ss1,ss2] = kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])
dkdubias[ss1,ss2] = 0
elif i==0 and j==1:
dkYdvart[ss1,ss2] = (k4(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
#dkYdvart[ss1,ss2] = ((2*lyt*ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (k4(ttdist[ss1,ss2])+1.)+\
vyt*kyy(tdist[ss1,ss2])*(dk4dlyt(ttdist[ss1,ss2]))
#dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (2*lyt*(ttdist[ss1,ss2])+1.)+\
#vyt*kyy(tdist[ss1,ss2])*(2*ttdist[ss1,ss2])
dkdubias[ss1,ss2] = 0
elif i==1 and j==1:
dkYdvart[ss1,ss2] = (k1(tdist[ss1,ss2]) + 1. )* kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])*( k1(tdist[ss1,ss2]) + 1. ) +\
vyt*kyy(tdist[ss1,ss2])*dk1dlyt(tdist[ss1,ss2])
dkdubias[ss1,ss2] = 1
else:
dkYdvart[ss1,ss2] = (-k4(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
#dkYdvart[ss1,ss2] = (-2*lyt*(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (-k4(ttdist[ss1,ss2])+1.)+\
vyt*kyy(tdist[ss1,ss2])*(-dk4dlyt(ttdist[ss1,ss2]) )
dkdubias[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (-2*lyt*(ttdist[ss1,ss2])+1.)+\
#vyt*kyy(tdist[ss1,ss2])*(-2)*(ttdist[ss1,ss2])
self.variance_Yt.gradient = np.sum(dkYdvart * dL_dK)
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK)
self.ubias.gradient = np.sum(dkdubias * dL_dK)

View file

188
GPy/kern/_src/add.py Normal file
View file

@ -0,0 +1,188 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from ...util.caching import Cache_this
from kern import CombinationKernel
class Add(CombinationKernel):
"""
Add given list of kernels together.
propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in.
"""
def __init__(self, subkerns, name='add'):
for i, kern in enumerate(subkerns[:]):
if isinstance(kern, Add):
del subkerns[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
subkerns.insert(i, part)
super(Add, self).__init__(subkerns, name)
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Add all kernels together.
If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance.
"""
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p1.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p2.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
for p1 in self.parameters:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parameters:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
return target_grads
def add(self, other):
if isinstance(other, Add):
other_params = other.parameters[:]
for p in other_params:
other.unlink_parameter(p)
self.link_parameters(*other_params)
else:
self.link_parameter(other)
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
return self
def input_sensitivity(self, summarize=True):
if summarize:
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
else:
i_s = np.zeros((len(self.parts), self.input_dim))
from operator import setitem
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
return i_s

50
GPy/kern/_src/brownian.py Normal file
View file

@ -0,0 +1,50 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class Brownian(Kern):
"""
Brownian motion in 1D only.
Negative times are treated as a separate (backwards!) Brownian motion.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self, input_dim=1, variance=1., active_dims=None, name='Brownian'):
assert input_dim==1, "Brownian motion in 1D only"
super(Brownian, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameters(self.variance)
def K(self,X,X2=None):
if X2 is None:
X2 = X
return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)
def Kdiag(self,X):
return self.variance*np.abs(X.flatten())
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.))
#def update_gradients_diag(self, dL_dKdiag, X):
#self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag)
#def gradients_X(self, dL_dK, X, X2=None):
#if X2 is None:
#return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None]
#else:
#return np.sum(np.where(np.logical_and(np.abs(X)<np.abs(X2.T), np.sign(X)==np.sign(X2)), self.variance*dL_dK,0.),1)[:,None]

View file

@ -0,0 +1,174 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from scipy import weave
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use weave
class Coregionalize(Kern):
"""
Covariance function for intrinsic/linear coregionalization models
This covariance has the form:
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
An intrinsic/linear coregionalization covariance function of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function
k(x, y) and B.
:param output_dim: number of outputs to coregionalize
:type output_dim: int
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim, )
.. note: see coregionalization examples in GPy.examples.regression for some usage.
"""
def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
self.output_dim = output_dim
self.rank = rank
if self.rank>output_dim:
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
if W is None:
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim, self.rank)
self.W = Param('W', W)
if kappa is None:
kappa = 0.5*np.ones(self.output_dim)
else:
assert kappa.shape==(self.output_dim, )
self.kappa = Param('kappa', kappa, Logexp())
self.link_parameters(self.W, self.kappa)
def parameters_changed(self):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self, X, X2=None):
if config.getboolean('weave', 'working'):
try:
return self._K_weave(X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self._K_numpy(X, X2)
else:
return self._K_numpy(X, X2)
def _K_numpy(self, X, X2=None):
index = np.asarray(X, dtype=np.int)
if X2 is None:
return self.B[index,index.T]
else:
index2 = np.asarray(X2, dtype=np.int)
return self.B[index,index2.T]
def _K_weave(self, X, X2=None):
"""compute the kernel function using scipy.weave"""
index = np.asarray(X, dtype=np.int)
if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<N; i++){
target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] = target[j+i*N];
}
}
"""
N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else:
index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
}
}
"""
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
def Kdiag(self, X):
return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
def update_gradients_full(self, dL_dK, X, X2=None):
index = np.asarray(X, dtype=np.int)
if X2 is None:
index2 = index
else:
index2 = np.asarray(X2, dtype=np.int)
#attempt to use weave for a nasty double indexing loop: fall back to numpy
if config.getboolean('weave', 'working'):
try:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
else:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
self.W.gradient = dW
self.kappa.gradient = dkappa
def _gradient_reduce_weave(self, dL_dK, index, index2):
dL_dK_small = np.zeros_like(self.B)
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
return dL_dK_small
def _gradient_reduce_numpy(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
dL_dK_small = np.zeros_like(self.B)
for i in range(k.output_dim):
tmp1 = dL_dK[index==i]
for j in range(k.output_dim):
dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small
def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
self.kappa.gradient = dL_dKdiag_small
def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)

View file

@ -0,0 +1,202 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern, CombinationKernel
import numpy as np
import itertools
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(CombinationKernel):
"""
A kernel which can represent several independent functions. this kernel
'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X the
rest of the columns of X are passed to the underlying kernel for
computation (in blocks).
:param kernels: either a kernel, or list of kernels to work with. If it is
a list of kernels the indices in the index_dim, index the kernels you gave!
"""
def __init__(self, kernels, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the index"
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target
def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim])
if self.single_kern:
target = np.zeros(self.kern.size)
kerns = itertools.repeat(self.kern)
else:
kerns = self.kern
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None:
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
# TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
else:
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target
def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape)
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def update_gradients_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
class Hierarchical(CombinationKernel):
"""
A kernel which can represent a simple hierarchical model.
See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
series across irregularly sampled replicates and clusters"
http://www.biomedcentral.com/1471-2105/14/252
To construct this kernel, you must pass a list of kernels. the first kernel
will be assumed to be the 'base' kernel, and will be computed everywhere.
For every additional kernel, we assume another layer in the hierachy, with
a corresponding column of the input matrix which indexes which function the
data are in at that level.
For more, see the ipython notebook documentation on Hierarchical
covariances.
"""
def __init__(self, kernels, name='hierarchy'):
assert all([k.input_dim==kernels[0].input_dim for k in kernels])
assert len(kernels) > 1
self.levels = len(kernels) -1
input_max = max([k.input_dim for k in kernels])
super(Hierarchical, self).__init__(kernels=kernels, extra_dims = range(input_max, input_max + len(kernels)-1), name=name)
def K(self,X ,X2=None):
K = self.parts[0].K(X, X2) # compute 'base' kern everywhere
slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None:
[[[np.add(K[s,s], k.K(X[s], None), K[s, s]) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.parts[1:], slices)]
else:
slices2 = [index_to_slices(X2[:,i]) for i in self.extra_dims]
[[[np.add(K[s,ss], k.K(X[s], X2[ss]), K[s, ss]) for s,ss in zip(slices_i, slices_j)] for slices_i, slices_j in zip(slices_k1, slices_k2)] for k, slices_k1, slices_k2 in zip(self.parts[1:], slices, slices2)]
return K
def Kdiag(self,X):
return np.diag(self.K(X))
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def update_gradients_full(self,dL_dK,X,X2=None):
slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None:
self.parts[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.parts[1:], slices):
target = np.zeros(k.size)
def collate_grads(dL, X, X2, target):
k.update_gradients_full(dL,X,X2)
target += k.gradient
[[collate_grads(dL_dK[s,s], X[s], None, target) for s in slices_i] for slices_i in slices_k]
k.gradient[:] = target
else:
raise NotImplementedError

280
GPy/kern/_src/kern.py Normal file
View file

@ -0,0 +1,280 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this
from GPy.core.parameterization.observable_array import ObsAr
class Kern(Parameterized):
#===========================================================================
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta
#===========================================================================
_support_GPU=False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
"""
The base class for a kernel: a positive definite function
which forms of a covariance function (kernel).
input_dim:
is the number of dimensions to work on. Make sure to give the
tight dimensionality of inputs.
You most likely want this to be the integer telling the number of
input dimensions of the kernel.
If this is not an integer (!) we will work on the whole input matrix X,
and not check whether dimensions match or not (!).
active_dims:
is the active_dimensions of inputs X we will work on.
All kernels will get sliced Xes as inputs, if active_dims is not None
Only positive integers are allowed in active_dims!
if active_dims is None, slicing is switched off and all X will be passed through as given.
:param int input_dim: the number of input dimensions to the function
:param array-like|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
self.input_dim = int(input_dim)
if active_dims is None:
active_dims = np.arange(input_dim)
self.active_dims = np.atleast_1d(active_dims).astype(int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20)
def _slice_X(self, X):
return X[:, self.active_dims]
def K(self, X, X2):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
raise NotImplementedError
def Kdiag(self, X):
raise NotImplementedError
def psi0(self, Z, variational_posterior):
raise NotImplementedError
def psi1(self, Z, variational_posterior):
raise NotImplementedError
def psi2(self, Z, variational_posterior):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
""" update the gradients of all parameters when using only the diagonal elements of the covariance matrix"""
raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
"""
raise NotImplementedError
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables.
"""
raise NotImplementedError
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
"""
plot this kernel.
:param x: the value to use for the other kernel argument (kernels are a function of two variables!)
:param fignum: figure number of the plot
:param ax: matplotlib axis to plot on
:param title: the matplotlib title
:param plot_limits: the range over which to plot the kernel
:resolution: the resolution of the lines used in plotting
:mpl_kwargs avalid keyword arguments to pass through to matplotlib (e.g. lw=7)
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
kernel_plots.plot(self, x, fignum, ax, title, plot_limits, resolution, **mpl_kwargs)
def plot_ARD(self, *args, **kw):
"""
See :class:`~GPy.plotting.matplot_dep.kernel_plots`
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args,**kw)
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this kernel.
"""
return np.zeros(self.input_dim)
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def __iadd__(self, other):
return self.add(other)
def add(self, other, name='add'):
"""
Add another kernel to this one.
:param other: the other kernel to be added
:type other: GPy.kern
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
return Add([self, other], name=name)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __imul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other):
"""
Shortcut for tensor `prod`.
"""
assert np.all(self.active_dims == range(self.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
assert np.all(other.active_dims == range(other.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim
return self.prod(other)
def prod(self, other, name='mul'):
"""
Multiply two kernels (either on the same space, or on the tensor
product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from prod import Prod
#kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other.parameters)
#else: kernels.append(other)
return Prod([self, other], name)
def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X):
assert X.shape[1] >= len(self.active_dims), "At least {} dimensional X needed, X.shape={!s}".format(len(self.active_dims), X.shape)
class CombinationKernel(Kern):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
"""
def __init__(self, kernels, name, extra_dims=[]):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
:param list kernels: List of kernels to combine (can be only one element)
:param str name: name of the combination kernel
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
"""
assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
self.extra_dims = extra_dims
self.link_parameters(*kernels)
@property
def parts(self):
return self.parameters
def get_input_dim_active_dims(self, kernels, extra_dims = None):
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
#active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = reduce(max, (k.active_dims.max() for k in kernels)) + 1
if extra_dims is not None:
input_dim += extra_dims.size
active_dims = np.arange(input_dim)
return input_dim, active_dims
def input_sensitivity(self, summarize=True):
"""
If summize is true, we want to get the summerized view of the sensitivities,
otherwise put everything into an array with shape (#kernels, input_dim)
in the order of appearance of the kernels in the parameterized object.
"""
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
def _check_active_dims(self, X):
return
def _check_input_dim(self, X):
# As combination kernels cannot always know, what their inner kernels have as input dims, the check will be done inside them, respectively
return

View file

@ -0,0 +1,143 @@
'''
Created on 11 Mar 2014
@author: maxz
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
from functools import wraps
def put_clean(dct, name, func):
if name in dct:
dct['_clean_{}'.format(name)] = dct[name]
dct[name] = func(dct[name])
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __new__(cls, name, bases, dct):
put_clean(dct, 'K', _slice_K)
put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi)
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object):
def __init__(self, k, X, X2=None):
self.k = k
self.shape = X.shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
if (self.k.active_dims is not None) and (self.k._sliced_X == 0):
self.k._check_active_dims(X)
self.X = self.k._slice_X(X)
self.X2 = self.k._slice_X(X2) if X2 is not None else X2
self.ret = True
else:
self.k._check_input_dim(X)
self.X = X
self.X2 = X2
self.ret = False
def __enter__(self):
self.k._sliced_X += 1
return self
def __exit__(self, *a):
self.k._sliced_X -= 1
def handle_return_array(self, return_val):
if self.ret:
ret = np.zeros(self.shape)
ret[:, self.k.active_dims] = return_val
return ret
return return_val
def _slice_K(f):
@wraps(f)
def wrap(self, X, X2 = None, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
ret = f(self, s.X, s.X2, *a, **kw)
return ret
return wrap
def _slice_Kdiag(f):
@wraps(f)
def wrap(self, X, *a, **kw):
with _Slice_wrap(self, X, None) as s:
ret = f(self, s.X, *a, **kw)
return ret
return wrap
def _slice_update_gradients_full(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = f(self, dL_dK, s.X, s.X2)
return ret
return wrap
def _slice_update_gradients_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = f(self, dL_dKdiag, s.X)
return ret
return wrap
def _slice_gradients_X(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap
def _slice_psi(f):
@wraps(f)
def wrap(self, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, s.X, s.X2)
return ret
return wrap
def _slice_update_gradients_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
return ret
return wrap
def _slice_gradients_Z_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret
return wrap
def _slice_gradients_qX_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
r2 = ret[:2]
ret[0] = s.handle_return_array(r2[0])
ret[1] = s.handle_return_array(r2[1])
del r2
return ret
return wrap

177
GPy/kern/_src/linear.py Normal file
View file

@ -0,0 +1,177 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...util.linalg import tdot
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this
from ...util.config import *
from .psi_comp import PSICOMP_Linear
class Linear(Kern):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='linear'):
super(Linear, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.link_parameter(self.variances)
self.psicomp = PSICOMP_Linear()
@Cache_this(limit=2)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
return tdot(X*np.sqrt(self.variances))
else:
rv = np.sqrt(self.variances)
return np.dot(X*rv, (X2*rv).T)
else:
return self._dot_product(X, X2) * self.variances
@Cache_this(limit=1, ignore_args=(0,))
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return np.sum(self.variances * np.square(X), -1)
def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD:
if X2 is None:
#self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X)
else:
#product = X[:, None, :] * X2[None, :, :]
#self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X2)
else:
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = np.atleast_1d(tmp.sum())
def gradients_X(self, dL_dK, X, X2=None):
if X2 is None:
return np.einsum('jq,q,ij->iq', X, 2*self.variances, dL_dK)
else:
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim) * self.variances
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0]
if self.ARD:
self.variances.gradient = dL_dvar
else:
self.variances.gradient = dL_dvar.sum()
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
super(LinearFull, self).__init__(input_dim, active_dims, name)
if W is None:
W = np.ones((input_dim, rank))
if kappa is None:
kappa = np.ones(input_dim)
assert W.shape == (input_dim, rank)
assert kappa.shape == (input_dim,)
self.W = Param('W', W)
self.kappa = Param('kappa', kappa, Logexp())
self.link_parameters(self.W, self.kappa)
def K(self, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
def update_gradients_full(self, dL_dK, X, X2=None):
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
def Kdiag(self, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,ik->i', X, P, X)
def update_gradients_diag(self, dL_dKdiag, X):
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
if X2 is None:
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
else:
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
def gradients_X_diag(self, dL_dKdiag, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)

129
GPy/kern/_src/mlp.py Normal file
View file

@ -0,0 +1,129 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
four_over_tau = 2./np.pi
class MLP(Kern):
"""
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
.. math::
k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
super(MLP, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.weight_variance = Param('weight_variance', weight_variance, Logexp())
self.bias_variance = Param('bias_variance', bias_variance, Logexp())
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
def K(self, X, X2=None):
self._K_computations(X, X2)
return self.variance*self._K_dvar
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
return self.variance*self._K_diag_dvar
def update_gradients_full(self, dL_dK, X, X2=None):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self.variance.gradient = np.sum(self._K_dvar*dL_dK)
denom3 = self._K_denom**3
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
base_cov_grad = base*dL_dK
if X2 is None:
vec = np.diag(self._K_inner_prod)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec[None, :]+vec[:, None])*self.weight_variance
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
else:
vec1 = (X*X).sum(1)
vec2 = (X2*X2).sum(1)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X):
raise NotImplementedError, "TODO"
def gradients_X(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
denom3 = denom*denom*denom
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
#numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
def _K_computations(self, X, X2):
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec = np.diag(self._K_numer) + 1.
self._K_denom = np.sqrt(np.outer(vec,vec))
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
self._K_denom = np.sqrt(np.outer(vec1,vec2))
self._K_asin_arg = self._K_numer/self._K_denom
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
def _K_diag_computations(self, X):
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_denom = self._K_diag_numer+1.
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)

405
GPy/kern/_src/periodic.py Normal file
View file

@ -0,0 +1,405 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...util.linalg import mdot
from ...util.decorators import silence_errors
from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp
class Periodic(Kern):
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
"""
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
super(Periodic, self).__init__(input_dim, active_dims, name)
self.input_dim = input_dim
self.lower,self.upper = lower, upper
self.n_freq = n_freq
self.n_basis = 2*n_freq
self.variance = Param('variance', np.float64(variance), Logexp())
self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
self.period = Param('period', np.float64(period), Logexp())
self.link_parameters(self.variance, self.lengthscale, self.period)
def _cos(self, alpha, omega, phase):
def f(x):
return alpha*np.cos(omega*x + phase)
return f
@silence_errors
def _cos_factorization(self, alpha, omega, phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def K(self, X, X2=None):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
return mdot(FX,self.Gi,FX2.T)
def Kdiag(self,X):
return np.diag(self.K(X))
class PeriodicExponential(Periodic):
"""
Kernel of the periodic subspace (up to a given frequency) of a exponential
(Matern 1/2) RKHS.
Only defined for input_dim=1.
"""
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_exponential'):
super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self):
self.a = [1./self.lengthscale, 1.]
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
# SIMPLIFY!!! IPPprim1 = (self.upper - self.lower)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.logical_or(np.isnan(IPPprim1), np.isinf(IPPprim1)), IPPprim2, IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK)
class PeriodicMatern32(Periodic):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern32'):
super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self):
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
@silence_errors
def update_gradients_full(self,dL_dK,X,X2):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK)
class PeriodicMatern52(Periodic):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern52'):
super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self):
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK)

41
GPy/kern/_src/poly.py Normal file
View file

@ -0,0 +1,41 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Poly(Kern):
"""
Polynomial kernel
"""
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
super(Poly, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance)
self.order=order
def K(self, X, X2=None):
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
def _dot_product(self, X, X2=None):
if X2 is None:
return np.dot(X, X.T)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return self.variance*(np.square(X).sum(1) + 1.)**self.order
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

66
GPy/kern/_src/prod.py Normal file
View file

@ -0,0 +1,66 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
class Prod(CombinationKernel):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self, kernels, name='mul'):
for i, kern in enumerate(kernels[:]):
if isinstance(kern, Prod):
del kernels[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
kernels.insert(i, part)
super(Prod, self).__init__(kernels, name)
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
k = self.K(X,X2)*dL_dK
for p in self.parts:
p.update_gradients_full(k/p.K(X,X2),X,X2)
def update_gradients_diag(self, dL_dKdiag, X):
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
p.update_gradients_diag(k/p.Kdiag(X),X)
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
k = self.K(X,X2)*dL_dK
for p in self.parts:
target += p.gradients_X(k/p.K(X,X2),X,X2)
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target

View file

@ -0,0 +1,55 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ....core.parameterization.parameter_core import Pickleable
from GPy.util.caching import Cache_this
from ....core.parameterization import variational
import rbf_psi_comp
import ssrbf_psi_comp
import sslinear_psi_comp
import linear_psi_comp
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass
class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass

View file

@ -0,0 +1,77 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for Bayesian GPLVM
"""
import numpy as np
from ....util.linalg import tdot
def psicomputations(variance, Z, variational_posterior):
"""
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
psi1 = np.dot(mu,(variance*Z).T)
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
dL_dvar, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dpsi0_var = dL_dpsi0[:,None]*variance[None,:]
dL_dpsi1_mu = np.dot(dL_dpsi1.T,mu)
dL_dvar += (dL_dpsi0[:,None]*mu2S).sum(axis=0)+ (dL_dpsi1_mu*Z).sum(axis=0)
dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance
dL_dS += dL_dpsi0_var
dL_dZ += dL_dpsi1_mu*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS
def _psi2computations(dL_dpsi2, variance, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2_dvariance Q
# _psi2_dZ MxQ
# _psi2_dmu NxQ
# _psi2_dS NxQ
variance2 = np.square(variance)
common_sum = np.dot(mu,(variance*Z).T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
Z2_expect = np.inner(common_sum,dL_dpsi2T)
Z1_expect = np.dot(dL_dpsi2T,Z)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -0,0 +1,161 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log)
return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
denom = 1./(S+lengthscale2)
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
_dL_dvar = Lpsi1.sum()/variance
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -0,0 +1,411 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
__device__ void reduce_sum(double* array, int array_size) {
int s;
if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi1 += (muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(-2.);
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
psi2_local[threadIdx.x] = 0;
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu[IDX_NQ(n,q)]- (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi2_n += dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) + log_denom2[IDX_NQ(n,q)]/(-2.);
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi1, double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmu = Z[IDX_MQ(m,q)] - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
dmu_local += lpsi1*Zmu/denom;
dS_local += lpsi1*(Zmu2_denom-1.)/denom;
dl_local += lpsi1*(Zmu2_denom+Snq/lq)/denom;
g_local[threadIdx.x] = -lpsi1*Zmu/denom;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi2, double *psi2n, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu_nq - (Z[IDX_MQ(m1,q)] + Z[IDX_MQ(m2,q)])/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
dmu_local += lpsi2*muZhat/denom;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom;
dl_local += lpsi2*((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq));
g_local[threadIdx.x] += 2.*lpsi2*(muZhat/denom-dZ/(2*lq));
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPdPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPdPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_RBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
# derivatives
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
# grad
'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
}
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S):
if len(lengthscale)==1:
self.gpuCache['l_gpu'].fill(lengthscale)
else:
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
N,Q = self.gpuCache['S_gpu'].shape
# t=self.g_compDenom(self.gpuCache['log_denom1_gpu'],self.gpuCache['log_denom2_gpu'],self.gpuCache['l_gpu'],self.gpuCache['S_gpu'], np.int32(N), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_compDenom '+str(t)
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
# t = self.g_psi1computations(psi1_gpu, log_denom1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1computations '+str(t)
# t = self.g_psi2computations(psi2_gpu, psi2n_gpu, log_denom2_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2computations '+str(t)
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = dL_dpsi0.get().sum() #gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + dvar_gpu.get().sum()#gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = dl_gpu.get().sum() #gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS

View file

@ -0,0 +1,92 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for SSGPLVM
"""
from ....util.linalg import tdot
import numpy as np
def psicomputations(variance, Z, variational_posterior):
"""
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = (gamma*(np.square(mu)+S)*variance).sum(axis=-1)
psi1 = np.inner(variance*gamma*mu,Z)
psi2 = np.inner(np.square(variance)*(gamma*((1-gamma)*np.square(mu)+S)).sum(axis=0)*Z,Z)+tdot(psi1.T)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2_dvariance Q
# _psi2_dZ MxQ
# _psi2_dgamma NxQ
# _psi2_dmu NxQ
# _psi2_dS NxQ
mu2 = np.square(mu)
gamma2 = np.square(gamma)
variance2 = np.square(variance)
mu2S = mu2+S # NxQ
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
common_sum = np.einsum('nq,mq->nm',gvm,Z)
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -0,0 +1,394 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the psi statistics computation
"""
import numpy as np
try:
from scipy import weave
def _psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
variance = float(variance)
psi0 = np.empty(N)
psi0[:] = variance
psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M))
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<=m1;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
if(m2==0) {
// Compute Psi_1
double muZ = mu(n,q)-Z(m1,q);
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
}
// Compute Psi_2
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
double dZ = Zm1q - Zm2q;
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
}
psi1(n,m1) = variance*exp(log_psi1);
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma, gamma1 = variational_posterior.gamma_probabilities()
variance = float(variance)
dvar = np.zeros(1)
dmu = np.zeros((N,Q))
dS = np.zeros((N,Q))
dgamma = np.zeros((N,Q))
dl = np.zeros(Q)
dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0)
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<M;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q);
if(m2==0) {
// Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;}
double Zmu = Zm1q - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
// Compute Psi_2
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
if(q==0) {dvar(0) += lpsi2*2/variance;}
double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale
if not ARD:
dl = dl.sum()
return dvar, dl, dZ, dmu, dS, dgamma
except:
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -0,0 +1,474 @@
"""
The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
"""
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
__device__ void reduce_sum(double* array, int array_size) {
int s;
if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *log_gamma, double*log_gamma1, double *gamma, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double gnq = gamma[IDX_NQ(n,q)];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
log_gamma[IDX_NQ(n,q)] = log(gnq);
log_gamma1[IDX_NQ(n,q)] = log(1.-gnq);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double Zmq = Z[IDX_MQ(m,q)];
double muZ = mu[IDX_NQ(n,q)]-Zmq;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = log_gamma[IDX_NQ(n,q)]-(muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(2.);
double exp2 = log_gamma1[IDX_NQ(n,q)]-Zmq*Zmq/(2.*lq);
log_psi1 += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
psi2_local[threadIdx.x] = 0;
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double muZhat = mu[IDX_NQ(n,q)]- (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2[IDX_NQ(n,q)]/2. + log_gamma[IDX_NQ(n,q)];
double exp2 = log_gamma1[IDX_NQ(n,q)] - Z2/(2.*lq);
log_psi2_n += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi1, double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom1[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmq = Z[IDX_MQ(m,q)];
double Zmu = Zmq - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gnq-(Zmu*Zmu/(Snq+lq)+log_de)/(2.);
double exp2 = log_gnq1-Zmq*Zmq/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS_local += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum);
dgamma_local += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zmq*Zmq/(lq*lq)*d_exp2)/(2.*exp_sum);
g_local[threadIdx.x] = lpsi1*(-Zmu/denom*d_exp1-Zmq/lq*d_exp2)/exp_sum;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom2[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_de/2. + log_gnq;
double exp2 = log_gnq1 - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi2*muZhat/denom*d_exp1/exp_sum;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma_local += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
g_local[threadIdx.x] += 2.*lpsi2*((muZhat/denom-dZ/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPPPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_SSRBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_gamma1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
# derivatives
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dgamma_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
# grad
'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_gamma_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
}
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S, gamma):
if len(lengthscale)==1:
self.gpuCache['l_gpu'].fill(lengthscale)
else:
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
self.gpuCache['gamma_gpu'].set(np.asfortranarray(gamma))
N,Q = self.gpuCache['S_gpu'].shape
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['log_gamma_gpu'].gpudata,self.gpuCache['log_gamma1_gpu'].gpudata,self.gpuCache['gamma_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['dgamma_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
self.gpuCache['grad_gamma_gpu'].fill(0.)
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
dgamma_gpu = self.gpuCache['dgamma_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
sum_axis(grad_gamma_gpu,dgamma_gpu,N*Q,self.blocknum)
dL_dgamma = grad_gamma_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma

71
GPy/kern/_src/rbf.py Normal file
View file

@ -0,0 +1,71 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from stationary import Stationary
from psi_comp import PSICOMP_RBF
from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
from ...util.config import *
class RBF(Stationary):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
"""
_support_GPU = True
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.psicomp = PSICOMP_RBF()
if self.useGPU:
self.psicomp = PSICOMP_RBF_GPU()
else:
self.psicomp = PSICOMP_RBF()
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, r):
return -r*self.K_of_r(r)
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
return dc
def __setstate__(self, state):
return super(RBF, self).__setstate__(state)
def spectrum(self, omega):
assert self.input_dim == 1 #TODO: higher dim spectra?
return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]

204
GPy/kern/_src/splitKern.py Normal file
View file

@ -0,0 +1,204 @@
"""
A new kernel
"""
import numpy as np
from kern import Kern,CombinationKernel
from .independent_outputs import index_to_slices
import itertools
class DiffGenomeKern(Kern):
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
self.idx_p = idx_p
self.index_dim=index_dim
self.kern = SplitKern(kernel,Xp, index_dim=index_dim)
super(DiffGenomeKern, self).__init__(input_dim=kernel.input_dim+1, active_dims=None, name=name)
self.add_parameter(self.kern)
def K(self, X, X2=None):
assert X2==None
K = self.kern.K(X,X2)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return K
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
K_c = K[idx_start:idx_end,idx_start:idx_end].copy()
K[idx_start:idx_end,:] = K[:self.idx_p,:]
K[:,idx_start:idx_end] = K[:,:self.idx_p]
K[idx_start:idx_end,idx_start:idx_end] = K_c
return K
def Kdiag(self,X):
Kdiag = self.kern.Kdiag(X)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return Kdiag
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
Kdiag[idx_start:idx_end] = Kdiag[:self.idx_p]
return Kdiag
def update_gradients_full(self,dL_dK,X,X2=None):
assert X2==None
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
self.kern.update_gradients_full(dL_dK, X)
return
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[:self.idx_p],X)
grad_p1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[:self.idx_p])
grad_p2 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[:self.idx_p],X[idx_start:idx_end])
grad_p3 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[:self.idx_p])
grad_p4 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[idx_start:idx_end],X)
grad_n1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[idx_start:idx_end])
grad_n2 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[idx_start:idx_end])
grad_n3 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK, X)
self.kern.gradient += grad_p1+grad_p2-grad_p3-grad_p4-grad_n1-grad_n2+2*grad_n3
def update_gradients_diag(self, dL_dKdiag, X):
pass
class SplitKern(CombinationKernel):
def __init__(self, kernel, Xp, index_dim=-1, name='SplitKern'):
assert isinstance(index_dim, int), "The index dimension must be an integer!"
self.kern = kernel
self.kern_cross = SplitKern_cross(kernel,Xp)
super(SplitKern, self).__init__(kernels=[self.kern, self.kern_cross], extra_dims=[index_dim], name=name)
self.index_dim = index_dim
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
assert len(slices)<=2, 'The Split kernel only support two different indices'
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
# diagonal blocks
[[target.__setitem__((s,ss), self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
if len(slices)>1:
# cross blocks
[target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[0], slices[1])]
# cross blocks
[target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[1], slices[0])]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
assert len(slices2)<=2, 'The Split kernel only support two different indices'
target = np.zeros((X.shape[0], X2.shape[0]))
# diagonal blocks
[[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
if len(slices)>1:
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1:
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[0], slices2[1])]
return target
def Kdiag(self,X):
return self.kern.Kdiag(X)
def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2, cross=False):
if cross:
self.kern_cross.update_gradients_full(dL,X,X2)
target[:] += self.kern_cross.kern.gradient
else:
self.kern.update_gradients_full(dL,X,X2)
target[:] += self.kern.gradient
if X2 is None:
assert dL_dK.shape==(X.shape[0],X.shape[0])
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
if len(slices)>1:
[collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[0], slices[1])]
[collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[1], slices[0])]
else:
assert dL_dK.shape==(X.shape[0],X2.shape[0])
slices2 = index_to_slices(X2[:,self.index_dim])
[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
if len(slices)>1:
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1:
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[0], slices2[1])]
self.kern.gradient = target
def update_gradients_diag(self, dL_dKdiag, X):
self.kern.update_gradients_diag(self, dL_dKdiag, X)
class SplitKern_cross(Kern):
def __init__(self, kernel, Xp, name='SplitKern_cross'):
assert isinstance(kernel, Kern)
self.kern = kernel
if not isinstance(Xp,np.ndarray):
Xp = np.array([[Xp]])
self.Xp = Xp
super(SplitKern_cross, self).__init__(input_dim=kernel.input_dim, active_dims=None, name=name)
def K(self, X, X2=None):
if X2 is None:
return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X))/self.kern.K(self.Xp,self.Xp)
else:
return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X2))/self.kern.K(self.Xp,self.Xp)
def Kdiag(self, X):
return np.inner(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X).T)/self.kern.K(self.Xp,self.Xp)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
k1 = self.kern.K(X,self.Xp)
k2 = self.kern.K(self.Xp,X2)
k3 = self.kern.K(self.Xp,self.Xp)
dL_dk1 = np.einsum('ij,j->i',dL_dK,k2[0])/k3[0,0]
dL_dk2 = np.einsum('ij,i->j',dL_dK,k1[:,0])/k3[0,0]
dL_dk3 = np.einsum('ij,ij->',dL_dK,-np.dot(k1,k2)/(k3[0,0]*k3[0,0]))
self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
grad = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X2)
grad += self.kern.gradient.copy()
self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
grad += self.kern.gradient.copy()
self.kern.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
k1 = self.kern.K(X,self.Xp)
k2 = self.kern.K(self.Xp,X)
k3 = self.kern.K(self.Xp,self.Xp)
dL_dk1 = dL_dKdiag*k2[0]/k3
dL_dk2 = dL_dKdiag*k1[:,0]/k3
dL_dk3 = -dL_dKdiag*(k1[:,0]*k2[0]).sum()/(k3*k3)
self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
grad1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X)
grad2 = self.kern.gradient.copy()
self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
grad3 = self.kern.gradient.copy()
self.kern.gradient = grad1+grad2+grad3

122
GPy/kern/_src/static.py Normal file
View file

@ -0,0 +1,122 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Static(Kern):
def __init__(self, input_dim, variance, active_dims, name):
super(Static, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameters(self.variance)
def Kdiag(self, X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
def psi0(self, Z, variational_posterior):
return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior):
return self.K(variational_posterior.mean, Z)
def psi2(self, Z, variational_posterior):
K = self.K(variational_posterior.mean, Z)
return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
def input_sensitivity(self, summarize=True):
if summarize:
return super(Static, self).input_sensitivity(summarize=summarize)
else:
return np.ones(self.input_dim) * self.variance
class White(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
super(White, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None):
if X2 is None:
return np.eye(X.shape[0])*self.variance
else:
return np.zeros((X.shape[0], X2.shape[0]))
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.trace(dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum()
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()
class Bias(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Fixed, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
def Kdiag(self, X):
return self.variance * self.fixed_K.diag()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()

484
GPy/kern/_src/stationary.py Normal file
View file

@ -0,0 +1,484 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot
from ... import util
import numpy as np
from scipy import integrate, weave
from ...util.config import config # for assesing whether to use weave
from ...util.caching import Cache_this
class Stationary(Kern):
"""
Stationary kernels (covariance functions).
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
The lengthscale(s) and variance parameters are added to the structure automatically.
"""
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
self.ARD = ARD
if not ARD:
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad number of lengthscales"
if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale
else:
lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1
self.link_parameters(self.variance, self.lengthscale)
def K_of_r(self, r):
raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
def dK_dr(self, r):
raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
In the stationary case there is an inner function depending on the
distances from X to X2, called r.
K(X, X2) = K_of_r((X-X2)**2)
"""
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
@Cache_this(limit=3, ignore_args=())
def dK_dr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))
def _unscaled_dist(self, X, X2=None):
"""
Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None.
"""
#X, = self._slice_X(X)
if X2 is None:
Xsq = np.sum(np.square(X),1)
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
else:
#X2, = self._slice_X(X2)
X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1)
r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None):
"""
Efficiently compute the scaled distance, r.
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
function for caching) and divide by lengthscale afterwards
"""
if self.ARD:
if X2 is not None:
X2 = X2 / self.lengthscale
return self._unscaled_dist(X/self.lengthscale, X2)
else:
return self._unscaled_dist(X, X2)/self.lengthscale
def Kdiag(self, X):
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_diag(self, dL_dKdiag, X):
"""
Given the derivative of the objective with respect to the diagonal of
the covariance matrix, compute the derivative wrt the parameters of
this kernel and stor in the <parameter>.gradient field.
See also update_gradients_full
"""
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt the covariance matrix
(dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient
"""
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD:
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
#d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
def _inv_dist(self, X, X2=None):
"""
Compute the elementwise inverse of the distance matrix, expecpt on the
diagonal, where we return zero (the distance on the diagonal is zero).
This term appears in derviatives.
"""
dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
N,M = tmp.shape
Q = X.shape[1]
if hasattr(X, 'values'):X = X.values
if hasattr(X2, 'values'):X2 = X2.values
grads = np.zeros(self.input_dim)
code = """
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads(q) = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
"""
if config.getboolean('weave', 'working'):
try:
return self.gradients_X_weave(dL_dK, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
else:
return self.gradients_X_(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
#The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :]
#ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
#the lower memory way with a loop
ret = np.empty(X.shape, dtype=np.float64)
for q in xrange(self.input_dim):
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
ret /= self.lengthscale**2
return ret
def gradients_X_weave(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
code = """
int n,m,d;
double retnd;
#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp(n,m)*(X(n,d)-X2(m,d));
}
ret(n,d) = retnd;
}
}
"""
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
if hasattr(X2, 'values'):X2 = X2.values
ret = np.zeros(X.shape)
N,D = X.shape
N,M = tmp.shape
from scipy import weave
support_code = """
#include <omp.h>
#include <stdio.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
return ret/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim)/self.lengthscale**2
class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r)
def dK_dr(self, r):
return -0.5*self.K_of_r(r)
class OU(Stationary):
"""
OU kernel:
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='OU'):
super(OU, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-r)
def dK_dr(self,r):
return -1.*self.variance*np.exp(-r)
class Matern32(Stationary):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
def dK_dr(self,r):
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the
RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
class Matern52(Stationary):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
def dK_dr(self, r):
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self, F, F1, F2, F3, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))
class ExpQuad(Stationary):
"""
The Exponentiated quadratic covariance function.
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
notes::
- Yes, this is exactly the same as the RBF covariance function, but the
RBF implementation also has some features for doing variational kernels
(the psi-statistics).
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, r):
return -r*self.K_of_r(r)
class Cosine(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.cos(r)
def dK_dr(self, r):
return -self.variance * np.sin(r)
class RatQuad(Stationary):
"""
Rational Quadratic Kernel
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
"""
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
self.power = Param('power', power, Logexp())
self.link_parameters(self.power)
def K_of_r(self, r):
r2 = np.power(r, 2.)
return self.variance*np.power(1. + r2/2., -self.power)
def dK_dr(self, r):
r2 = np.power(r, 2.)
return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.)
def update_gradients_full(self, dL_dK, X, X2=None):
super(RatQuad, self).update_gradients_full(dL_dK, X, X2)
r = self._scaled_dist(X, X2)
r2 = np.power(r, 2.)
dK_dpow = -self.variance * np.power(2., self.power) * np.power(r2 + 2., -self.power) * np.log(0.5*(r2+2.))
grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
self.power.gradient = 0.

75
GPy/kern/_src/symbolic.py Normal file
View file

@ -0,0 +1,75 @@
# Check Matthew Rocklin's blog post.
import sympy as sym
import numpy as np
from kern import Kern
from ...core.symbolic import Symbolic_core
class Symbolic(Kern, Symbolic_core):
"""
"""
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
Kern.__init__(self, input_dim, active_dims, name=name)
kdiag = k
self.cacheable = ['X', 'Z']
Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
self.output_dim = output_dim
def __add__(self,other):
return spkern(self._sym_k+other._sym_k)
def _set_expressions(self, expressions):
"""This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
Symbolic_core._set_expressions(self, expressions)
Symbolic_core._set_variables(self, self.cacheable)
# Substitute z with x to obtain kdiag.
for x, z in zip(self.variables['X'], self.variables['Z']):
expressions['kdiag'] = expressions['kdiag'].subs(z, x)
Symbolic_core._set_expressions(self, expressions)
def K(self,X,X2=None):
if X2 is None:
return self.eval_function('k', X=X, Z=X)
else:
return self.eval_function('k', X=X, Z=X2)
def Kdiag(self,X):
d = self.eval_function('kdiag', X=X)
if not d.shape[0] == X.shape[0]:
d = np.tile(d, (X.shape[0], 1))
return d
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
if X2 is None:
g *= 2
return g
def gradients_X_diag(self, dL_dK, X):
return self.eval_gradients_X('kdiag', dL_dK, X=X)
def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first
if X2 is None:
# need to double this inside ...
gradients = self.eval_update_gradients('k', dL_dK, X=X)
else:
gradients = self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
for name, val in gradients:
setattr(getattr(self, name), 'gradient', val)
def update_gradients_diag(self, dL_dKdiag, X):
gradients = self.eval_update_gradients('kdiag', dL_dKdiag, X)
for name, val in gradients:
setattr(getattr(self, name), 'gradient', val)

View file

@ -0,0 +1,196 @@
#include "Python.h"
#include <math.h>
#include <float.h>
#include <stdlib.h>
#include <iostream>
#include <stdexcept>
double DiracDelta(double x){
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
return 1.0;
else
return 0.0;
};
double DiracDelta(double x,int foo){
return 0.0;
};
double sinc(double x){
// compute the sinc function
if (x==0)
return 1.0;
else
return sin(x)/x;
}
double sinc_grad(double x){
// compute the gradient of the sinc function.
if (x==0)
return 0.0;
else
return (x*cos(x) - sin(x))/(x*x);
}
double erfcx(double x){
// Based on code by Soren Hauberg 2010 for Octave.
// compute the scaled complex error function.
//return erfc(x)*exp(x*x);
double xneg=-sqrt(log(DBL_MAX/2));
double xmax = 1/(sqrt(M_PI)*DBL_MIN);
xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
// Find values where erfcx can be evaluated
double t = 3.97886080735226 / (fabs(x) + 3.97886080735226);
double u = t-0.5;
double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
y = ((((((((((((y * u - 0.0838864557023001992) * u -
0.119463959964325415) * u + 0.0166207924969367356) * u +
0.357524274449531043) * u + 0.805276408752910567) * u +
1.18902982909273333) * u + 1.37040217682338167) * u +
1.31314653831023098) * u + 1.07925515155856677) * u +
0.774368199119538609) * u + 0.490165080585318424) * u +
0.275374741597376782) * t;
if (x<xneg)
return -INFINITY;
else if (x<0)
return 2.0*exp(x*x)-y;
else if (x>xmax)
return 0.0;
else
return y;
}
double ln_diff_erf(double x0, double x1){
// stably compute the log of difference between two erfs.
if (x1>x0){
PyErr_SetString(PyExc_RuntimeError,"second argument must be smaller than or equal to first in ln_diff_erf");
throw 1;
}
if (x0==x1){
PyErr_WarnEx(PyExc_RuntimeWarning,"divide by zero encountered in log", 1);
return -INFINITY;
}
else if(x0<0 && x1>0 || x0>0 && x1<0) //x0 and x1 have opposite signs
return log(erf(x0)-erf(x1));
else if(x0>0) //x0 positive, x1 non-negative
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1- x0*x0))-x1*x1;
else //x0 and x1 non-positive
return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
}
// TODO: For all these computations of h things are very efficient at the moment. Need to recode sympykern to allow the precomputations to take place and all the gradients to be computed in one function. Not sure of best way forward for that yet. Neil
double h(double t, double tprime, double d_i, double d_j, double l){
// Compute the h function for the sim covariance.
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
// if either ln_part_1 or ln_part_2 are -inf, don't bother computing rest of that term.
double part_1 = 0.0;
if(isfinite(ln_part_1))
part_1 = sign_val*exp(half_l_di*half_l_di - d_i*(t-tprime) + ln_part_1 - log(d_i + d_j));
double part_2 = 0.0;
if(isfinite(ln_part_2))
part_2 = sign_val*exp(half_l_di*half_l_di - d_i*t - d_j*tprime + ln_part_2 - log(d_i + d_j));
return part_1 - part_2;
}
double dh_dd_i(double t, double tprime, double d_i, double d_j, double l){
double diff_t = (t-tprime);
double l2 = l*l;
double hv = h(t, tprime, d_i, d_j, l);
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_1 = half_l_di;
arg_2 = half_l_di - t/l;
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
double base = (0.5*d_i*l2*(d_i+d_j)-1)*hv;
if(isfinite(ln_part_1))
base -= diff_t*sign_val*exp(half_l_di*half_l_di
-d_i*diff_t
+ln_part_1);
if(isfinite(ln_part_2))
base += t*sign_val*exp(half_l_di*half_l_di
-d_i*t-d_j*tprime
+ln_part_2);
base += l/sqrt(M_PI)*(-exp(-diff_t*diff_t/l2)
+exp(-tprime*tprime/l2-d_i*t)
+exp(-t*t/l2-d_j*tprime)
-exp(-(d_i*t + d_j*tprime)));
return base/(d_i+d_j);
}
double dh_dd_j(double t, double tprime, double d_i, double d_j, double l){
double half_l_di = 0.5*l*d_i;
double hv = h(t, tprime, d_i, d_j, l);
double sign_val = 1.0;
if(t/l==0)
sign_val = 0.0;
else if (t/l < 0)
sign_val = -1.0;
double ln_part_2 = ln_diff_erf(half_l_di, half_l_di - t/l);
double base = -hv;
if(isfinite(ln_part_2))
base += tprime*sign_val*exp(half_l_di*half_l_di-(d_i*t+d_j*tprime)+ln_part_2);
return base/(d_i+d_j);
}
double dh_dl(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to lengthscale for sim covariance
// TODO a lot of energy wasted recomputing things here, need to do this in a shared way somehow ... perhaps needs rewrite of sympykern.
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - (t-tprime)/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
double diff_t = t - tprime;
double l2 = l*l;
double hv = h(t, tprime, d_i, d_j, l);
return 0.5*d_i*d_i*l*hv + 2/(sqrt(M_PI)*(d_i+d_j))*((-diff_t/l2-d_i/2)*exp(-diff_t*diff_t/l2)+(-tprime/l2+d_i/2)*exp(-tprime*tprime/l2-d_i*t)-(-t/l2-d_i/2)*exp(-t*t/l2-d_j*tprime)-d_i/2*exp(-(d_i*t+d_j*tprime)));
}
double dh_dt(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to t.
double diff_t = t - tprime;
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - diff_t/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
return (d_i*exp(ln_part_2-d_i*t - d_j*tprime) - d_i*exp(ln_part_1-d_i*diff_t) + 2*exp(-d_i*diff_t - pow(half_l_di - diff_t/l, 2))/(sqrt(M_PI)*l) - 2*exp(-d_i*t - d_j*tprime - pow(half_l_di - t/l,2))/(sqrt(M_PI)*l))*exp(half_l_di*half_l_di)/(d_i + d_j);
}
double dh_dtprime(double t, double tprime, double d_i, double d_j, double l){
// compute gradient of h function with respect to tprime.
double diff_t = t - tprime;
double half_l_di = 0.5*l*d_i;
double arg_1 = half_l_di + tprime/l;
double arg_2 = half_l_di - diff_t/l;
double ln_part_1 = ln_diff_erf(arg_1, arg_2);
arg_2 = half_l_di - t/l;
double ln_part_2 = ln_diff_erf(half_l_di, arg_2);
return (d_i*exp(ln_part_1-d_i*diff_t) + d_j*exp(ln_part_2-d_i*t - d_j*tprime) + (-2*exp(-pow(half_l_di - diff_t/l,2)) + 2*exp(-pow(half_l_di + tprime/l,2)))*exp(-d_i*diff_t)/(sqrt(M_PI)*l))*exp(half_l_di*half_l_di)/(d_i + d_j);
}

View file

@ -0,0 +1,16 @@
#include <math.h>
double DiracDelta(double x);
double DiracDelta(double x, int foo);
double sinc(double x);
double sinc_grad(double x);
double erfcx(double x);
double ln_diff_erf(double x0, double x1);
double h(double t, double tprime, double d_i, double d_j, double l);
double dh_dl(double t, double tprime, double d_i, double d_j, double l);
double dh_dd_i(double t, double tprime, double d_i, double d_j, double l);
double dh_dd_j(double t, double tprime, double d_i, double d_j, double l);
double dh_dt(double t, double tprime, double d_i, double d_j, double l);
double dh_dtprime(double t, double tprime, double d_i, double d_j, double l);

165
GPy/kern/_src/todo/ODE_1.py Normal file
View file

@ -0,0 +1,165 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class ODE_1(Kernpart):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
assert input_dim==1, "Only defined for input_dim = 1"
self.input_dim = input_dim
self.num_params = 4
self.name = 'ODE_1'
if lengthscaleU is not None:
lengthscaleU = np.asarray(lengthscaleU)
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
else:
lengthscaleU = np.ones(1)
if lengthscaleY is not None:
lengthscaleY = np.asarray(lengthscaleY)
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
else:
lengthscaleY = np.ones(1)
#lengthscaleY = 0.5
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.varianceU = x[0]
self.varianceY = x[1]
self.lengthscaleU = x[2]
self.lengthscaleY = x[3]
def _get_param_names(self):
"""return parameter names."""
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
# i1 = X[:,1]
# i2 = X2[:,1]
# X = X[:,0].reshape(-1,1)
# X2 = X2[:,0].reshape(-1,1)
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
#c=np.sqrt(3)
#t1=c/lu
#t2=1/ly
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
dk2theta1 = 1*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 = 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
dkdvar = k1+k2+k3
#target[0] dk dvarU
#target[1] dk dvarY
#target[2] dk d theta1
#target[3] dk d theta2
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
# # NB: derivative of diagonal elements wrt lengthscale is 0
# target[0] += np.sum(dL_dKdiag)
# def dK_dX(self, dL_dK, X, X2, target):
# """derivative of the covariance matrix with respect to X."""
# if X2 is None: X2 = X
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
# def dKdiag_dX(self, dL_dKdiag, X, target):
# pass

View file

@ -0,0 +1,556 @@
# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.ln_diff_erfs import ln_diff_erfs
import pdb
from scipy import weave
class Eq_ode1(Kernpart):
"""
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
self.rank = rank
self.input_dim = 1
self.name = 'eq_ode1'
self.output_dim = output_dim
self.lengthscale = lengthscale
self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
if kappa is not None:
self.num_params+=self.output_dim
if delay is not None:
assert delay.shape==(self.output_dim-1,)
self.num_params+=self.output_dim-1
self.rank = rank
if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim,self.rank)
self.W = W
if decay is None:
self.decay = np.ones(self.output_dim-1)
if kappa is not None:
assert kappa.shape==(self.output_dim,)
self.kappa = kappa
self.delay = delay
self.is_normalized = True
self.is_stationary = False
self.gaussian_initial = False
self._set_params(self._get_params())
def _get_params(self):
param_list = [self.W.flatten()]
if self.kappa is not None:
param_list.append(self.kappa)
param_list.append(self.decay)
if self.delay is not None:
param_list.append(self.delay)
param_list.append(self.lengthscale)
return np.hstack(param_list)
def _set_params(self,x):
assert x.size == self.num_params
end = self.output_dim*self.rank
self.W = x[:end].reshape(self.output_dim,self.rank)
start = end
self.B = np.dot(self.W,self.W.T)
if self.kappa is not None:
end+=self.output_dim
self.kappa = x[start:end]
self.B += np.diag(self.kappa)
start=end
end+=self.output_dim-1
self.decay = x[start:end]
start=end
if self.delay is not None:
end+=self.output_dim-1
self.delay = x[start:end]
start=end
end+=1
self.lengthscale = x[start]
self.sigma = np.sqrt(2)*self.lengthscale
def _get_param_names(self):
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
if self.kappa is not None:
param_names += ['kappa_%i'%i for i in range(self.output_dim)]
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
if self.delay is not None:
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
param_names+= ['lengthscale']
return param_names
def K(self,X,X2,target):
if X.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
self._K_computations(X, X2)
target += self._scale*self._K_dvar
if self.gaussian_initial:
# Add covariance associated with initial condition.
t1_mat = self._t[self._rorder, None]
t2_mat = self._t2[None, self._rorder2]
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
def Kdiag(self,index,target):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass
def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK)
self._dK_ode_dtheta(target)
def _dK_ode_dtheta(self, target):
"""Do all the computations for the ode parts of the covariance function."""
t_ode = self._t[self._index>0]
dL_dK_ode = self._dL_dK[self._index>0, :]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
return
t2_ode = t_ode
dL_dK_ode = dL_dK_ode[:, self._index>0]
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
dL_dK_ode = dL_dK_ode[:, self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
return
index2_ode = self._index2[self._index2>0]-1
h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay = self._dh_ddelay
self._dK_dsigma = self._dh_dsigma
if self._t2 is None:
h2 = h1
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay += self._dh_ddelay.T
self._dK_dsigma += self._dh_dsigma.T
# C1 = self.sensitivity
# C2 = self.sensitivity
# K = 0.5 * (h1 + h2.T)
# var2 = C1*C2
# if self.is_normalized:
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
# dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
# dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
# dk_dC1 = C2 * sum(sum(dL_dK.*K))
# dk_dC2 = C1 * sum(sum(dL_dK.*K))
# else:
# K = np.sqrt(np.pi) * K
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
# dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
# dk_dSim1Variance = dk_dC1
# Last element is the length scale.
(dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
# # only pass the gradient with respect to the inverse width to one
# # of the gradient vectors ... otherwise it is counted twice.
# g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
# g2 = real([dk_dD2 0 dk_dSim2Variance])
# return g1, g2"""
def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass
def gradients_X(self,dL_dK,X,X2,target):
pass
def _extract_t_indices(self, X, X2=None, dL_dK=None):
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
# TODO: some fast checking here to see if this needs recomputing?
self._t = X[:, 0]
if not X.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._index = np.asarray(X[:, 1],dtype=np.int)
# Sort indices so that outputs are in blocks for computational
# convenience.
self._order = self._index.argsort()
self._index = self._index[self._order]
self._t = self._t[self._order]
self._rorder = self._order.argsort() # rorder is for reversing the order
if X2 is None:
self._t2 = None
self._index2 = None
self._order2 = self._order
self._rorder2 = self._rorder
else:
if not X2.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._t2 = X2[:, 0]
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
self._order2 = self._index2.argsort()
self._index2 = self._index2[self._order2]
self._t2 = self._t2[self._order2]
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
if dL_dK is not None:
self._dL_dK = dL_dK[self._order, :]
self._dL_dK = self._dL_dK[:, self._order2]
def _K_computations(self, X, X2):
"""Perform main body of computations for the ode1 covariance function."""
# First extract times and indices.
self._extract_t_indices(X, X2)
self._K_compute_eq()
self._K_compute_ode_eq()
if X2 is None:
self._K_eq_ode = self._K_ode_eq.T
else:
self._K_compute_ode_eq(transpose=True)
self._K_compute_ode()
if X2 is None:
self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
else:
self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
# Reorder values of blocks for placing back into _K_dvar.
self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
np.hstack((self._K_ode_eq, self._K_ode))))
self._K_dvar = self._K_dvar[self._rorder, :]
self._K_dvar = self._K_dvar[:, self._rorder2]
if X2 is None:
# Matrix giving scales of each output
self._scale = np.zeros((self._t.size, self._t.size))
code="""
for(int i=0;i<N; i++){
scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
for(int j=0; j<i; j++){
scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
scale_mat[i+j*N] = scale_mat[j+i*N];
}
}
"""
scale_mat, B, index = self._scale, self.B, self._index
N, output_dim = self._t.size, self.output_dim
weave.inline(code,['index',
'scale_mat', 'B',
'N', 'output_dim'])
else:
self._scale = np.zeros((self._t.size, self._t2.size))
code = """
for(int i=0; i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
}
}
"""
scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
weave.inline(code, ['index', 'index2',
'scale_mat', 'B',
'N', 'N2', 'output_dim'])
def _K_compute_eq(self):
"""Compute covariance for latent covariance."""
t_eq = self._t[self._index==0]
if self._t2 is None:
if t_eq.size==0:
self._K_eq = np.zeros((0, 0))
return
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
else:
t2_eq = self._t2[self._index2==0]
if t_eq.size==0 or t2_eq.size==0:
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
return
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
if self.is_normalized:
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
def _K_compute_ode_eq(self, transpose=False):
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
:type transpose: bool"""
if self._t2 is not None:
if transpose:
t_eq = self._t[self._index==0]
t_ode = self._t2[self._index2>0]
index_ode = self._index2[self._index2>0]-1
else:
t_eq = self._t2[self._index2==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
else:
t_eq = self._t[self._index==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if t_ode.size==0 or t_eq.size==0:
if transpose:
self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
else:
self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
return
t_ode_mat = t_ode[:, None]
t_eq_mat = t_eq[None, :]
if self.delay is not None:
t_ode_mat -= self.delay[index_ode, None]
diff_t = (t_ode_mat - t_eq_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
decay_vals = self.decay[index_ode][:, None]
half_sigma_d_i = 0.5*self.sigma*decay_vals
if self.is_stationary:
ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
else:
ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
sK *= 0.5
if not self.is_normalized:
sK *= np.sqrt(np.pi)*self.sigma
if transpose:
self._K_eq_ode = sK.T
else:
self._K_ode_eq = sK
def _K_compute_ode(self):
# Compute covariances between outputs of the ODE models.
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
self._K_ode = np.zeros((0, 0))
return
t2_ode = t_ode
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
return
index2_ode = self._index2[self._index2>0]-1
# When index is identical
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
if self._t2 is None:
self._K_ode = 0.5 * (h + h.T)
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
self._K_ode = 0.5 * (h + h2.T)
if not self.is_normalized:
self._K_ode *= np.sqrt(np.pi)*self.sigma
def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
"""Helper function for computing H for the diagonal only.
:param t: time input.
:type t: array
:param index: first output indices
:type index: array of int.
:param index: second output indices
:type index: array of int.
:param update_derivatives: whether or not to update the derivative portions (default False).
:type update_derivatives: bool
:param stationary: whether to compute the stationary version of the covariance (default False).
:type stationary: bool"""
"""if delta_i~=delta_j:
[h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
else:
Decay = self.decay[index]
if self.delay is not None:
t = t - self.delay[index]
t_squared = t*t
half_sigma_decay = 0.5*self.sigma*Decay
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
half_sigma_decay)
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
half_sigma_decay - t/self.sigma)
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+ ln_part_1
- log(Decay + D_j))
- sign2*np.exp(half_sigma_decay*half_sigma_decay
- (Decay + D_j)*t
+ ln_part_2
- log(Decay + D_j)))
sigma2 = self.sigma*self.sigma
if update_derivatives:
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
+ t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
)
+ self.sigma/np.sqrt(np.pi)*
(-1 + np.exp(-t_squared/sigma2-Decay*t)
+ np.exp(-t_squared/sigma2-D_j*t)
- np.exp(-(Decay + D_j)*t)))
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
dh_dD_j = (t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
)
-h)
dh_dD_j = (dh_dD_j/(Decay + D_j)).real
dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+ 2/(np.sqrt(np.pi)*(Decay+D_j))\
*((-Decay/2) \
+ (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
- Decay/2*np.exp(-(Decay+D_j)*t))"""
pass
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
"""Helper function for computing part of the ode1 covariance function.
:param t: first time input.
:type t: array
:param index: Indices of first output.
:type index: array of int
:param t2: second time input.
:type t2: array
:param index2: Indices of second output.
:type index2: array of int
:param update_derivatives: whether to update derivatives (default is False)
:return h : result of this subcomponent of the kernel for the given values.
:rtype: ndarray
"""
if stationary:
raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
# Vector of decays and delays associated with each output.
Decay = self.decay[index]
Decay2 = self.decay[index2]
t_mat = t[:, None]
t2_mat = t2[None, :]
if self.delay is not None:
Delay = self.delay[index]
Delay2 = self.delay[index2]
t_mat-=Delay[:, None]
t2_mat-=Delay2[None, :]
diff_t = (t_mat - t2_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
half_sigma_decay_i - inv_sigma_diff_t,
return_sign=True)
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
half_sigma_decay_i - t_mat/self.sigma,
return_sign=True)
h = sign1*np.exp(half_sigma_decay_i
*half_sigma_decay_i
-Decay[:, None]*diff_t+ln_part_1
-np.log(Decay[:, None] + Decay2[None, :]))
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
-np.log(Decay[:, None] + Decay2[None, :]))
if update_derivatives:
sigma2 = self.sigma*self.sigma
# Update ith decay gradient
dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+ (-diff_t*sign1*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
)
+t_mat*sign2*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
- Decay2*t2_mat+ln_part_2))
+self.sigma/np.sqrt(np.pi)*(
-np.exp(
-diff_t*diff_t/sigma2
)+np.exp(
-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
)+np.exp(
-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
)-np.exp(
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
)
))
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
# Update jth decay gradient
dh_ddecay2 = (t2_mat*sign2
*np.exp(
half_sigma_decay_i*half_sigma_decay_i
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+ln_part_2
)
-h)
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
# Update sigma gradient
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+ 2/(np.sqrt(np.pi)
*(Decay[:, None]+Decay2[None, :]))
*((-diff_t/sigma2-Decay[:, None]/2)
*np.exp(-diff_t*diff_t/sigma2)
+ (-t2_mat/sigma2+Decay[:, None]/2)
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
- (-t_mat/sigma2-Decay[:, None]/2)
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
- Decay[:, None]/2
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
return h

View file

@ -0,0 +1,74 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import pdinv,mdot
class FiniteDimensional(Kernpart):
def __init__(self, input_dim, F, G, variance=1., weights=None):
"""
Argumnents
----------
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
weights : np.ndarray with shape (n,)
"""
self.input_dim = input_dim
self.F = F
self.G = G
self.G_1 ,L,Li,logdet = pdinv(G)
self.n = F.shape[0]
if weights is not None:
assert weights.shape==(self.n,)
else:
weights = np.ones(self.n)
self.num_params = self.n + 1
self.name = 'finite_dim'
self._set_params(np.hstack((variance,weights)))
def _get_params(self):
return np.hstack((self.variance,self.weights))
def _set_params(self,x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weights = x[1:]
def _get_param_names(self):
if self.n==1:
return ['variance','weight']
else:
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
def K(self,X,X2,target):
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(product,target,target)
def Kdiag(self,X,target):
product = np.diag(self.K(X, X))
np.add(target,product,target)
def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
DER = np.zeros((self.n,self.n,self.n))
for i in range(self.n):
DER[i,i,i] = np.sqrt(self.weights[i])
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])

View file

@ -0,0 +1,41 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Fixed(Kernpart):
def __init__(self, input_dim, K, variance=1.):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
self.input_dim = input_dim
self.fixed_K = K
self.num_params = 1
self.name = 'fixed'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self, x):
assert x.shape == (1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self, X, X2, target):
target += self.variance * self.fixed_K
def _param_grad_helper(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum()
def gradients_X(self, partial, X, X2, target):
pass
def dKdiag_dX(self, partial, X, target):
pass

154
GPy/kern/_src/todo/gibbs.py Normal file
View file

@ -0,0 +1,154 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Gibbs(Kernpart):
"""
Gibbs non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
See Mark Gibbs's thesis for more details: Gibbs,
M. N. (1997). Bayesian Gaussian Processes for Regression and
Classification. PhD thesis, Department of Physics, University of
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
Learning by Rasmussen and Williams. Although note that we do not
constrain the lengthscale to be positive by default. This allows
anticorrelation to occur. The positive constraint can be included
by the user manually.
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not ARD:
self.num_params=1+mapping.num_params
else:
raise NotImplementedError
self.mapping = mapping
self.name='gibbs'
self._set_params(np.hstack((variance, self.mapping._get_params())))
def _get_params(self):
return np.hstack((self.variance, self.mapping._get_params()))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.mapping._set_params(x[1:])
def _get_param_names(self):
return ['variance'] + self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self._dK_computations(dL_dK)
if X2==None:
gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
else:
gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
if X2 is None:
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
pass
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None):
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
self._lengthscales=self.mapping.f(X)
self._lengthscales2=np.square(self._lengthscales)
if X2==None:
self._lengthscales_two = self._lengthscales
self._lengthscales_two2 = self._lengthscales2
Xsquare = np.square(X).sum(1)
self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
else:
self._lengthscales_two = self.mapping.f(X2)
self._lengthscales_two2 = np.square(self._lengthscales_two)
self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
self._w2 = self._lengthscales2 + self._lengthscales_two2.T
prod_length = self._lengthscales*self._lengthscales_two.T
self._K_exponential = np.exp(-self._K_dist2/self._w2)
self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
def _dK_computations(self, dL_dK):
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
:param dL_dK: the gradient of the objective with respect to the covariance function.
:type dL_dK: ndarray"""
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
if self._lengthscales_two is self._lengthscales:
self._dL_dl_two = None
else:
self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)

View file

@ -0,0 +1,103 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Hetero(Kernpart):
"""
TODO: Need to constrain the function outputs
positive (still thinking of best way of doing this!!! Yes, intend to use
transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example,
this paper by Goldberg et al.
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:rtype: Kernpart object
See this paper:
Goldberg, P. W. Williams, C. K. I. and Bishop,
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
Process Treatment In Advances in Neural Information Processing
Systems, Volume 10, pp. 493-499. MIT Press
for a Gaussian process treatment of this problem.
"""
def __init__(self, input_dim, mapping=None, transform=None):
self.input_dim = input_dim
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not transform:
transform = GPy.core.transformations.logexp()
self.transform = transform
self.mapping = mapping
self.name='hetero'
self.num_params=self.mapping.num_params
self._set_params(self.mapping._get_params())
def _get_params(self):
return self.mapping._get_params()
def _set_params(self, x):
assert x.size == (self.num_params)
self.mapping._set_params(x)
def _get_param_names(self):
return self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
if (X2 is None) or (X2 is X):
target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
target+=self._Kdiag(X)
def _Kdiag(self, X):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dX(dL_dKdiag, X, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)

View file

@ -0,0 +1,38 @@
#include <math.h>
double k_uu(t1,t2,theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
return kern;
}
double k_yy(t1, t2, theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*sig2 * ( exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
exp(-dist) ) / ((theta2-theta1)*(theta2-theta1))
return kern;
}

138
GPy/kern/_src/todo/poly.py Normal file
View file

@ -0,0 +1,138 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
four_over_tau = 2./np.pi
class POLY(Kernpart):
"""
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
(degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would
be set by hand.
The kernel is not recommended as it is badly behaved when the
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this
kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial.
:type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params=3
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
else:
weight_variance = 1.*np.ones(1)
else:
self.num_params = self.input_dim + 2
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == self.input_dim, "bad number of weight variances"
else:
weight_variance = np.ones(self.input_dim)
raise NotImplementedError
self.degree=degree
self.name='poly_deg' + str(self.degree)
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
def _get_params(self):
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weight_variance = x[1:-1]
self.weight_std = np.sqrt(self.weight_variance)
self.bias_variance = x[-1]
def _get_param_names(self):
if self.num_params == 3:
return ['variance', 'weight_variance', 'bias_variance']
else:
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
base_cov_grad = base*dL_dK
target[0] += np.sum(self._K_dvar*dL_dK)
target[1] += (self._K_inner_prod*base_cov_grad).sum()
target[2] += base_cov_grad.sum()
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):
if self.ARD:
pass
else:
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
self._K_dvar = self._K_poly_arg**self.degree
def _K_diag_computations(self, X):
if self.ARD:
pass
else:
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_dvar = self._K_diag_poly_arg**self.degree

View file

@ -0,0 +1,336 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from rbf import RBF
import numpy as np
from scipy import weave
from ...util.linalg import tdot
from ...core.parameterization import Param
class RBFInv(RBF):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. It only
differs from RBF in that here the parametrization is wrt the inverse lengthscale:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False, name='inverse rbf'):
#self.input_dim = input_dim
#self.name = 'rbf_inv'
if inv_lengthscale is not None: lengthscale = 1./np.array(inv_lengthscale)
else: lengthscale = None
super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=lengthscale, ARD=ARD, name=name)
self.ARD = ARD
if not ARD:
self.num_params = 2
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
inv_lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
inv_lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.inv_lengthscale = Param('sensitivity', inv_lengthscale)
self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale)
self.remove_parameter(self.lengthscale)
self.add_parameters(self.variance, self.inv_lengthscale)
#self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
# def _get_params(self):
# return np.hstack((self.variance, self.inv_lengthscale))
def update_inv_lengthscale(self, il):
self.inv_lengthscale2 = np.square(self.inv_lengthscale)
# TODO: We can rewrite everything with inv_lengthscale and never need to do the below
self.lengthscale = 1. / self.inv_lengthscale
self.lengthscale2 = np.square(self.lengthscale)
#def _set_params(self, x):
def parameters_changed(self):
#assert x.size == (self.num_params)
#self.variance = x[0]
#self.inv_lengthscale = x[1:]
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
# def _get_param_names(self):
# if self.num_params == 2:
# return ['variance', 'inv_lengthscale']
# else:
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def _param_grad_helper(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
len2 = self.lengthscale2
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def gradients_X(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
# def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
# self._psi_computations(Z, mu, S)
# denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
# d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
# target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
# dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
# if not self.ARD:
# target[1] += dpsi1_dlength.sum()*(-self.lengthscale2)
# else:
# target[1:] += dpsi1_dlength.sum(0).sum(0)*(-self.lengthscale2)
# #target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = 1 + S[:, None, :] * self.inv_lengthscale2
# d_inv_length_old = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / (self.lengthscale * self._psi1_denom) + self.inv_lengthscale) / self.inv_lengthscale2
d_length = -(self._psi1[:, :, None] * ((np.square(self._psi1_dist) * self.inv_lengthscale) / (tmp ** 2) + (S[:, None, :] * self.inv_lengthscale) / (tmp)))
# d_inv_length = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / self._psi1_denom + self.lengthscale)
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
target[1] += dpsi1_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi1_dlength.sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
dpsi1_dZ = -self._psi1[:, :, None] * ((self.inv_lengthscale2 * self._psi1_dist) / self._psi1_denom)
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = (self._psi1[:, :, None] * self.inv_lengthscale2) / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance
# d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
d_length = -2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] * self.inv_lengthscale2) / (self.inv_lengthscale * self._psi2_denom)
target[0] += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
target[1] += dpsi2_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist * self.inv_lengthscale2 # num_inducing, num_inducing, input_dim
term2 = (self._psi2_mudist * self.inv_lengthscale2) / self._psi2_denom # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = (self.inv_lengthscale2 * self._psi2[:, :, :, None]) / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params = self._get_params().copy()
if X2 is None:
self._X2 = None
X = X * self.inv_lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X * self.inv_lengthscale
X2 = X2 * self.inv_lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not np.array_equal(Z, self._Z):
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist * self.inv_lengthscale) # M,M,Q
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] * self.inv_lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = (np.square(self._psi1_dist) * self.inv_lengthscale2) / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] * self.inv_lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
inv_lengthscale2 = self.inv_lengthscale2
else:
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp*inv_lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...core.parameterization import Param
def theta(x):
"""Heaviside step function"""
return np.where(x>=0.,1.,0.)
class Spline(Kernpart):
"""
Spline kernel
:param input_dim: the number of input dimensions (fixed to 1 right now TODO)
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
def __init__(self,input_dim,variance=1.,lengthscale=1.):
self.input_dim = input_dim
assert self.input_dim==1
self.num_params = 1
self.name = 'spline'
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.add_parameters(self.variance, self.lengthscale)
# def _get_params(self):
# return self.variance
#
# def _set_params(self,x):
# self.variance = x
#
# def _get_param_names(self):
# return ['variance']
def K(self,X,X2,target):
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
t = X
s = X2.T
s_t = s-t # broadcasted subtraction
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3.
def _param_grad_helper(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target):
target += X.flatten()**3/3.
def dKdiag_dX(self,X,target):
target += self.variance*X**2

View file

@ -0,0 +1,81 @@
# Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Symmetric(Kernpart):
"""
Symmetrical kernels
:param k: the kernel to symmetrify
:type k: Kernpart
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
:type transform: A numpy array (input_dim x input_dim) specifiying the transform
:rtype: Kernpart
"""
def __init__(self,k,transform=None):
if transform is None:
transform = np.eye(k.input_dim)*-1.
assert transform.shape == (k.input_dim, k.input_dim)
self.transform = transform
self.input_dim = k.input_dim
self.num_params = k.num_params
self.name = k.name + '_symm'
self.k = k
self.add_parameter(k)
#self._set_params(k._get_params())
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.K(X,X2,target)
self.k.K(AX,X2,target)
self.k.K(X,AX2,target)
self.k.K(AX,AX2,target)
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k._param_grad_helper(dL_dK,X,X2,target)
self.k._param_grad_helper(dL_dK,AX,X2,target)
self.k._param_grad_helper(dL_dK,X,AX2,target)
self.k._param_grad_helper(dL_dK,AX,AX2,target)
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.gradients_X(dL_dK, X, X2, target)
self.k.gradients_X(dL_dK, AX, X2, target)
self.k.gradients_X(dL_dK, X, AX2, target)
self.k.gradients_X(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
foo = np.zeros((X.shape[0],X.shape[0]))
self.K(X,X,foo)
target += np.diag(foo)
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
raise NotImplementedError

View file

@ -0,0 +1,204 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this
from ...util.config import *
class TruncLinear(Kern):
"""
Truncated Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, delta=None, ARD=False, active_dims=None, name='linear'):
super(TruncLinear, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
delta = np.asarray(delta)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
delta = np.zeros(1)
else:
if variances is not None:
variances = np.asarray(variances)
delta = np.asarray(delta)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
delta = np.zeros(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.delta = Param('delta', delta)
self.add_parameter(self.variances)
self.add_parameter(self.delta)
@Cache_this(limit=2)
def K(self, X, X2=None):
XX = self.variances*self._product(X, X2)
return XX.sum(axis=-1)
@Cache_this(limit=2)
def _product(self, X, X2=None):
if X2 is None:
X2 = X
XX = np.einsum('nq,mq->nmq',X-self.delta,X2-self.delta)
XX[XX<0] = 0
return XX
def Kdiag(self, X):
return (self.variances*np.square(X-self.delta)).sum(axis=-1)
def update_gradients_full(self, dL_dK, X, X2=None):
dK_dvar = self._product(X, X2)
if X2 is None:
X2=X
dK_ddelta = self.variances*(2*self.delta-X[:,None,:]-X2[None,:,:])*(dK_dvar>0)
if self.ARD:
self.variances.gradient[:] = np.einsum('nmq,nm->q',dK_dvar,dL_dK)
self.delta.gradient[:] = np.einsum('nmq,nm->q',dK_ddelta,dL_dK)
else:
self.variances.gradient[:] = np.einsum('nmq,nm->',dK_dvar,dL_dK)
self.delta.gradient[:] = np.einsum('nmq,nm->',dK_ddelta,dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
if self.ARD:
self.variances.gradient[:] = np.einsum('nq,n->q',np.square(X-self.delta),dL_dKdiag)
self.delta.gradient[:] = np.einsum('nq,n->q',2*self.variances*(self.delta-X),dL_dKdiag)
else:
self.variances.gradient[:] = np.einsum('nq,n->',np.square(X-self.delta),dL_dKdiag)
self.delta.gradient[:] = np.einsum('nq,n->',2*self.variances*(self.delta-X),dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
XX = self._product(X, X2)
if X2 is None:
Xp = (self.variances*(X-self.delta))*(XX>0)
else:
Xp = (self.variances*(X2-self.delta))*(XX>0)
if X2 is None:
return np.einsum('nmq,nm->nq',Xp,dL_dK)+np.einsum('mnq,nm->mq',Xp,dL_dK)
else:
return np.einsum('nmq,nm->nq',Xp,dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*(X-self.delta)
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances
class TruncLinear_inf(Kern):
"""
Truncated Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, interval, variances=None, ARD=False, active_dims=None, name='linear'):
super(TruncLinear_inf, self).__init__(input_dim, active_dims, name)
self.interval = interval
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances)
# @Cache_this(limit=2)
def K(self, X, X2=None):
tmp = self._product(X, X2)
return (self.variances*tmp).sum(axis=-1)
# @Cache_this(limit=2)
def _product(self, X, X2=None):
if X2 is None:
X2 = X
X_X2 = X[:,None,:]-X2[None,:,:]
tmp = np.abs(X_X2**3)/6+np.einsum('nq,mq->nmq',X,X2)*(self.interval[1]-self.interval[0]) \
-(X[:,None,:]+X2[None,:,:])*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])/2+(self.interval[1]**3-self.interval[0]**3)/3.
return tmp
def Kdiag(self, X):
tmp = np.square(X)*(self.interval[1]-self.interval[0]) \
-X*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])+(self.interval[1]**3-self.interval[0]**3)/3
return (self.variances*tmp).sum(axis=-1)
def update_gradients_full(self, dL_dK, X, X2=None):
dK_dvar = self._product(X, X2)
if self.ARD:
self.variances.gradient[:] = np.einsum('nmq,nm->q',dK_dvar,dL_dK)
else:
self.variances.gradient[:] = np.einsum('nmq,nm->',dK_dvar,dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = np.square(X)*(self.interval[1]-self.interval[0]) \
-X*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])+(self.interval[1]**3-self.interval[0]**3)/3
if self.ARD:
self.variances.gradient[:] = np.einsum('nq,n->q',tmp,dL_dKdiag)
else:
self.variances.gradient[:] = np.einsum('nq,n->',tmp,dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
XX = self._product(X, X2)
if X2 is None:
Xp = (self.variances*(X-self.delta))*(XX>0)
else:
Xp = (self.variances*(X2-self.delta))*(XX>0)
if X2 is None:
return np.einsum('nmq,nm->nq',Xp,dL_dK)+np.einsum('mnq,nm->mq',Xp,dL_dK)
else:
return np.einsum('nmq,nm->nq',Xp,dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*(X-self.delta)
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances