Merge pull request #398 from SheffieldML/integral

Integral kernels added
This commit is contained in:
lionfish0 2016-06-13 16:30:52 +01:00 committed by GitHub
commit 5b03702075
5 changed files with 341 additions and 1 deletions

View file

@ -24,6 +24,9 @@ from .src.ODE_st import ODE_st
from .src.ODE_t import ODE_t
from .src.poly import Poly
from .src.eq_ode2 import EQ_ODE2
from .src.integral import Integral
from .src.integral_limits import Integral_Limits
from .src.multidimensional_integral_limits import Multidimensional_Integral_Limits
from .src.eq_ode1 import EQ_ODE1
from .src.trunclinear import TruncLinear,TruncLinear_inf
from .src.splitKern import SplitKern,DEtime

82
GPy/kern/src/integral.py Normal file
View file

@ -0,0 +1,82 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Integral(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel between...
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Integral, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h(t/l) - self.h((t - tprime)/l) + self.h(tprime/l) - 1)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl = np.zeros([X.shape[0],X.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],self.lengthscale[0]) #TODO Multiple length scales
dK_dv[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
#covariance between gradients (it's the gradients that we want out... maybe we should have a way of getting K_ff too? Currently you get the diag of K_ff from Kdiag)
def k_xx(self,t,tprime,l):
return 0.5 * (l**2) * ( self.g(t/l) - self.g((t - tprime)/l) + self.g(tprime/l) - 1)
def k_ff(self,t,tprime,l):
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
#covariance between the gradient and the actual value
def k_xf(self,t,tprime,l):
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf(tprime/l))
def K(self, X, X2=None):
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
K_xx[i,j] = self.k_xx(x[0],x2[0],self.lengthscale[0])
return K_xx * self.variances[0]
else:
K_xf = np.zeros([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
K_xf[i,j] = self.k_xf(x[0],x2[0],self.lengthscale[0])
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.zeros(X.shape[0])
for i,x in enumerate(X):
K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
return K_ff * self.variances[0]

View file

@ -0,0 +1,115 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import math
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
class Integral_Limits(Kern):
"""
Integral kernel. This kernel allows 1d histogram or binned data to be modelled.
The outputs are the counts in each bin. The inputs (on two dimensions) are the start and end points of each bin.
The kernel's predictions are the latent function which might have generated those binned results.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
"""
"""
super(Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl = np.zeros([X.shape[0],X.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl[i,j] = self.variances[0]*self.dk_dl(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
dK_dv[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0]) #the gradient wrt the variance is k_xx.
self.lengthscale.gradient = np.sum(dK_dl * dL_dK)
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def K(self, X, X2=None):
"""Note: We have a latent function and an output function. We want to be able to find:
- the covariance between values of the output function
- the covariance between values of the latent function
- the "cross covariance" between values of the output function and the latent function
This method is used by GPy to either get the covariance between the outputs (K_xx) or
is used to get the cross covariance (between the latent function and the outputs (K_xf).
We take advantage of the places where this function is used:
- if X2 is none, then we know that the items being compared (to get the covariance for)
are going to be both from the OUTPUT FUNCTION.
- if X2 is not none, then we know that the items being compared are from two different
sets (the OUTPUT FUNCTION and the LATENT FUNCTION).
If we want the covariance between values of the LATENT FUNCTION, we take advantage of
the fact that we only need that when we do prediction, and this only calls Kdiag (not K).
So the covariance between LATENT FUNCTIONS is available from Kdiag.
"""
if X2 is None:
K_xx = np.zeros([X.shape[0],X.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X):
K_xx[i,j] = self.k_xx(x[0],x2[0],x[1],x2[1],self.lengthscale[0])
return K_xx * self.variances[0]
else:
K_xf = np.zeros([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
K_xf[i,j] = self.k_xf(x[0],x2[0],x[1],self.lengthscale[0]) #x2[1] unused, see k_xf docstring for explanation.
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method during prediction (instead of K). When we
do prediction we want to know the covariance between LATENT FUNCTIONS (K_ff) (as that's probably
what the user wants).
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.zeros(X.shape[0])
for i,x in enumerate(X):
K_ff[i] = self.k_ff(x[0],x[0],self.lengthscale[0])
return K_ff * self.variances[0]

View file

@ -0,0 +1,120 @@
# Written by Mike Smith michaeltsmith.org.uk
from __future__ import division
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from paramz.transformations import Logexp
import math
class Multidimensional_Integral_Limits(Kern): #todo do I need to inherit from Stationary
"""
Integral kernel, can include limits on each integral value. This kernel allows an n-dimensional
histogram or binned data to be modelled. The outputs are the counts in each bin. The inputs
are the start and end points of each bin: Pairs of inputs act as the limits on each bin. So
inputs 4 and 5 provide the start and end values of each bin in the 3rd dimension.
The kernel's predictions are the latent function which might have generated those binned results.
"""
def __init__(self, input_dim, variances=None, lengthscale=None, ARD=False, active_dims=None, name='integral'):
super(Multidimensional_Integral_Limits, self).__init__(input_dim, active_dims, name)
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
self.lengthscale = Param('lengthscale', lengthscale, Logexp()) #Logexp - transforms to allow positive only values...
self.variances = Param('variances', variances, Logexp()) #and here.
self.link_parameters(self.variances, self.lengthscale) #this just takes a list of parameters we need to optimise.
def h(self, z):
return 0.5 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def dk_dl(self, t, tprime, s, sprime, l): #derivative of the kernel wrt lengthscale
return l * ( self.h((t-sprime)/l) - self.h((t - tprime)/l) + self.h((tprime-s)/l) - self.h((s-sprime)/l))
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: #we're finding dK_xx/dTheta
dK_dl_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
k_term = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dl = np.zeros([X.shape[0],X.shape[0],self.lengthscale.shape[0]])
dK_dv = np.zeros([X.shape[0],X.shape[0]])
for il,l in enumerate(self.lengthscale):
idx = il*2
for i,x in enumerate(X):
for j,x2 in enumerate(X):
dK_dl_term[i,j,il] = self.dk_dl(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
k_term[i,j,il] = self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
for il,l in enumerate(self.lengthscale):
dK_dl = self.variances[0] * dK_dl_term[:,:,il]
for jl, l in enumerate(self.lengthscale):
if jl!=il:
dK_dl *= k_term[:,:,jl]
self.lengthscale.gradient[il] = np.sum(dK_dl * dL_dK)
dK_dv = self.calc_K_xx_wo_variance(X) #the gradient wrt the variance is k_xx.
self.variances.gradient = np.sum(dK_dv * dL_dK)
else: #we're finding dK_xf/Dtheta
raise NotImplementedError("Currently this function only handles finding the gradient of a single vector of inputs (X) not a pair of vectors (X and X2)")
#useful little function to help calculate the covariances.
def g(self,z):
return 1.0 * z * np.sqrt(math.pi) * math.erf(z) + np.exp(-(z**2))
def k_xx(self,t,tprime,s,sprime,l):
"""Covariance between observed values.
s and t are one domain of the integral (i.e. the integral between s and t)
sprime and tprime are another domain of the integral (i.e. the integral between sprime and tprime)
We're interested in how correlated these two integrals are.
Note: We've not multiplied by the variance, this is done in K."""
return 0.5 * (l**2) * ( self.g((t-sprime)/l) + self.g((tprime-s)/l) - self.g((t - tprime)/l) - self.g((s-sprime)/l))
def k_ff(self,t,tprime,l):
"""Doesn't need s or sprime as we're looking at the 'derivatives', so no domains over which to integrate are required"""
return np.exp(-((t-tprime)**2)/(l**2)) #rbf
def k_xf(self,t,tprime,s,l):
"""Covariance between the gradient (latent value) and the actual (observed) value.
Note that sprime isn't actually used in this expression, presumably because the 'primes' are the gradient (latent) values which don't
involve an integration, and thus there is no domain over which they're integrated, just a single value that we want."""
return 0.5 * np.sqrt(math.pi) * l * (math.erf((t-tprime)/l) + math.erf((tprime-s)/l))
def calc_K_xx_wo_variance(self,X):
"""Calculates K_xx without the variance term"""
K_xx = np.ones([X.shape[0],X.shape[0]]) #ones now as a product occurs over each dimension
for i,x in enumerate(X):
for j,x2 in enumerate(X):
for il,l in enumerate(self.lengthscale):
idx = il*2 #each pair of input dimensions describe the limits on one actual dimension in the data
K_xx[i,j] *= self.k_xx(x[idx],x2[idx],x[idx+1],x2[idx+1],l)
return K_xx
def K(self, X, X2=None):
if X2 is None: #X vs X
K_xx = self.calc_K_xx_wo_variance(X)
return K_xx * self.variances[0]
else: #X vs X2
K_xf = np.ones([X.shape[0],X2.shape[0]])
for i,x in enumerate(X):
for j,x2 in enumerate(X2):
for il,l in enumerate(self.lengthscale):
idx = il*2
K_xf[i,j] *= self.k_xf(x[idx],x2[idx],x[idx+1],l)
return K_xf * self.variances[0]
def Kdiag(self, X):
"""I've used the fact that we call this method for K_ff when finding the covariance as a hack so
I know if I should return K_ff or K_xx. In this case we're returning K_ff!!
$K_{ff}^{post} = K_{ff} - K_{fx} K_{xx}^{-1} K_{xf}$"""
K_ff = np.ones(X.shape[0])
for i,x in enumerate(X):
for il,l in enumerate(self.lengthscale):
idx = il*2
K_ff[i] *= self.k_ff(x[idx],x[idx],l)
return K_ff * self.variances[0]

View file

@ -193,7 +193,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
try:
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print(("update_gradients_full, with differing X and X2, not implemented for " + kern.name))
if result and verbose:
print("Check passed.")
if not result:
@ -416,6 +421,21 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_integral(self):
k = GPy.kern.Integral(1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_multidimensional_integral_limits(self):
k = GPy.kern.Multidimensional_Integral_Limits(2)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_integral_limits(self):
k = GPy.kern.Integral_Limits(2)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Linear(self):
k = GPy.kern.Linear(self.D)
k.randomize()