merged master

This commit is contained in:
Nicolò Fusi 2013-01-22 17:57:09 +00:00
commit 6c4528e4da
53 changed files with 1242 additions and 821 deletions

View file

@ -23,16 +23,16 @@ class Brownian(kernpart):
assert self.D==1, "Brownian motion in 1D only"
self.Nparam = 1.
self.name = 'Brownian'
self.set_param(np.array([variance]).flatten())
self._set_params(np.array([variance]).flatten())
def get_param(self):
def _get_params(self):
return self.variance
def set_param(self,x):
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):

View file

@ -20,43 +20,54 @@ class Matern32(kernpart):
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
self.ARD = ARD
if ARD == False:
self.Nparam = 2
self.name = 'Mat32'
if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'Mat32'
self.set_param(np.hstack((variance,lengthscales)))
self.Nparam = self.D + 1
self.name = 'Mat32_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
return np.hstack((self.variance,self.lengthscale))
def set_param(self,x):
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
assert x.size == self.Nparam
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscale = x[1:]
def get_param_names(self):
def _get_param_names(self):
"""return parameter names."""
if self.D==1:
if self.Nparam == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
def Kdiag(self,X,target):
@ -66,26 +77,33 @@ class Matern32(kernpart):
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial)
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
if self.ARD == True:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
#dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,partial,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial)
def dK_dX(self,X,X2,target):
def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
dK_dX += - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,X,target):
pass
@ -104,7 +122,7 @@ class Matern32(kernpart):
"""
assert self.D == 1
def L(x,i):
return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x))
return(3./self.lengthscale**2*F[i](x) + 2*np.sqrt(3)/self.lengthscale*F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
@ -114,5 +132,5 @@ class Matern32(kernpart):
F1lower = np.array([f(lower) for f in F1])[:,None]
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
#return(G)
return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))

View file

@ -19,43 +19,53 @@ class Matern52(kernpart):
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
self.ARD = ARD
if ARD == False:
self.Nparam = 2
self.name = 'Mat32'
if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'Mat52'
self.set_param(np.hstack((variance,lengthscales)))
self.Nparam = self.D + 1
self.name = 'Mat32_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
return np.hstack((self.variance,self.lengthscale))
def set_param(self,x):
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
assert x.size == self.Nparam
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscale = x[1:]
def get_param_names(self):
def _get_param_names(self):
"""return parameter names."""
if self.D==1:
if self.Nparam == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target):
@ -65,24 +75,30 @@ class Matern52(kernpart):
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial)
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial)
def dK_dX(self,X,X2,target):
def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
dK_dX += - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,X,target):
@ -97,26 +113,26 @@ class Matern52(kernpart):
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
:type lower,upper: floats
"""
assert self.D == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x))
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5))
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))

View file

@ -2,5 +2,5 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy, sympykern
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, linear_ARD, rbf_sympy, sympykern
from kern import kern

View file

@ -17,16 +17,16 @@ class bias(kernpart):
self.D = D
self.Nparam = 1
self.name = 'bias'
self.set_param(np.array([variance]).flatten())
self._set_params(np.array([variance]).flatten())
def get_param(self):
def _get_params(self):
return self.variance
def set_param(self,x):
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):

View file

@ -6,7 +6,6 @@ import numpy as np
from kern import kern
from rbf import rbf as rbfpart
from rbf_ARD import rbf_ARD as rbf_ARD_part
from white import white as whitepart
from linear import linear as linearpart
from linear_ARD import linear_ARD as linear_ARD_part
@ -22,7 +21,7 @@ from Brownian import Brownian as Brownianpart
#using meta-classes to make the objects construct properly wthout them.
def rbf(D,variance=1., lengthscale=1.):
def rbf(D,variance=1., lengthscale=None,ARD=False):
"""
Construct an RBF kernel
@ -32,22 +31,10 @@ def rbf(D,variance=1., lengthscale=1.):
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = rbfpart(D,variance,lengthscale)
return kern(D, [part])
def rbf_ARD(D,variance=1., lengthscales=None):
"""
Construct an RBF kernel with Automatic Relevance Determination (ARD)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscales: the lengthscales of the kernel
:type lengthscales: None|np.ndarray
"""
part = rbf_ARD_part(D,variance,lengthscales)
part = rbfpart(D,variance,lengthscale,ARD)
return kern(D, [part])
def linear(D,lengthscales=None):
@ -86,43 +73,52 @@ def white(D,variance=1.):
part = whitepart(D,variance)
return kern(D, [part])
def exponential(D,variance=1., lengthscales=None):
def exponential(D,variance=1., lengthscale=None, ARD=False):
"""
Construct a exponential kernel.
Construct an exponential kernel
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = exponentialpart(D,variance, lengthscales)
part = exponentialpart(D,variance, lengthscale, ARD)
return kern(D, [part])
def Matern32(D,variance=1., lengthscales=None):
def Matern32(D,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 3/2 kernel.
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = Matern32part(D,variance, lengthscales)
part = Matern32part(D,variance, lengthscale, ARD)
return kern(D, [part])
def Matern52(D,variance=1., lengthscales=None):
def Matern52(D,variance=1., lengthscale=None, ARD=False):
"""
Construct a Matern 5/2 kernel.
Arguments
---------
D (int), obligatory
variance (float)
lengthscales (np.ndarray)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
"""
part = Matern52part(D,variance, lengthscales)
part = Matern52part(D,variance, lengthscale, ARD)
return kern(D, [part])
def bias(D,variance=1.):

View file

@ -19,42 +19,53 @@ class exponential(kernpart):
:type D: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param lengthscale: the lengthscales :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,)
:param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self,D,variance=1.,lengthscales=None):
def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
self.ARD = ARD
if ARD == False:
self.Nparam = 2
self.name = 'exp'
if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'exp'
self.set_param(np.hstack((variance,lengthscales)))
self.Nparam = self.D + 1
self.name = 'exp_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self):
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales))
return np.hstack((self.variance,self.lengthscale))
def set_param(self,x):
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==(self.D+1)
assert x.size == self.Nparam
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscale = x[1:]
def get_param_names(self):
def _get_param_names(self):
"""return parameter names."""
if self.D==1:
if self.Nparam == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*np.exp(-dist), target,target)
def Kdiag(self,X,target):
@ -64,24 +75,28 @@ class exponential(kernpart):
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = np.exp(-dist)
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[0] += np.sum(dvar*partial)
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
if self.ARD == True:
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,partial,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
#NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial)
def dK_dX(self,X,X2,target):
def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf)
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
@ -101,14 +116,14 @@ class exponential(kernpart):
"""
assert self.D == 1
def L(x,i):
return(1./self.lengthscales*F[i](x) + F1[i](x))
return(1./self.lengthscale*F[i](x) + F1[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
Flower = np.array([f(lower) for f in F])[:,None]
return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))
return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))

View file

@ -27,15 +27,15 @@ class finite_dimensional(kernpart):
weights = np.ones(self.n)
self.Nparam = self.n + 1
self.name = 'finite_dim'
self.set_param(np.hstack((variance,weights)))
self._set_params(np.hstack((variance,weights)))
def get_param(self):
def _get_params(self):
return np.hstack((self.variance,self.weights))
def set_param(self,x):
def _set_params(self,x):
assert x.size == (self.Nparam)
self.variance = x[0]
self.weights = x[1:]
def get_param_names(self):
def _get_param_names(self):
if self.n==1:
return ['variance','weight']
else:

View file

@ -133,20 +133,20 @@ class kern(parameterised):
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def get_param(self):
return np.hstack([p.get_param() for p in self.parts])
def _get_params(self):
return np.hstack([p._get_params() for p in self.parts])
def set_param(self,x):
[p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)]
def _set_params(self,x):
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
def get_param_names(self):
def _get_param_names(self):
#this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts],dtype=np.str)
counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
return sum([[name+'_'+n for n in k.get_param_names()] for name,k in zip(names,self.parts)],[])
return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
def K(self,X,X2=None,slices1=None,slices2=None):
assert X.shape[1]==self.D

View file

@ -16,11 +16,11 @@ class kernpart(object):
self.Nparam = 1
self.name = 'unnamed'
def get_param(self):
def _get_params(self):
raise NotImplementedError
def set_param(self,x):
def _set_params(self,x):
raise NotImplementedError
def get_param_names(self):
def _get_param_names(self):
raise NotImplementedError
def K(self,X,X2,target):
raise NotImplementedError

View file

@ -20,16 +20,16 @@ class linear(kernpart):
variance = 1.0
self.Nparam = 1
self.name = 'linear'
self.set_param(variance)
self._set_params(variance)
self._Xcache, self._X2cache = np.empty(shape=(2,))
def get_param(self):
def _get_params(self):
return self.variance
def set_param(self,x):
def _set_params(self,x):
self.variance = x
def get_param_names(self):
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):

View file

@ -23,16 +23,16 @@ class linear_ARD(kernpart):
variances = np.ones(self.D)
self.Nparam = int(self.D)
self.name = 'linear'
self.set_param(variances)
self._set_params(variances)
def get_param(self):
def _get_params(self):
return self.variances
def set_param(self,x):
def _set_params(self,x):
assert x.size==(self.Nparam)
self.variances = x
def get_param_names(self):
def _get_param_names(self):
if self.D==1:
return ['variance']
else:

View file

@ -8,46 +8,67 @@ import hashlib
class rbf(kernpart):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel.
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp(- \frac{r^2}{2\ell}) \qquad \qquad \\text{ where } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: for rbf with different lengthscale on each dimension, see rbf_ARD
"""
def __init__(self,D,variance=1.,lengthscale=1.):
def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D
self.Nparam = 2
self.name = 'rbf'
self.set_param(np.hstack((variance,lengthscale)))
self.ARD = ARD
if ARD == False:
self.Nparam = 2
self.name = 'rbf'
if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
self.Nparam = self.D + 1
self.name = 'rbf_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self):
def _get_params(self):
return np.hstack((self.variance,self.lengthscale))
def set_param(self,x):
self.variance, self.lengthscale = x
def _set_params(self,x):
assert x.size==(self.Nparam)
self.variance = x[0]
self.lengthscale = x[1:]
self.lengthscale2 = np.square(self.lengthscale)
#reset cached results
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
return ['variance','lengthscale']
def _get_param_names(self):
if self.Nparam == 2:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
if X2 is None:
@ -61,7 +82,12 @@ class rbf(kernpart):
def dK_dtheta(self,partial,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*partial)
target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
if self.ARD == True:
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial)
#np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
def dKdiag_dtheta(self,partial,X,target):
#NB: derivative of diagonal elements wrt lengthscale is 0
@ -81,15 +107,12 @@ class rbf(kernpart):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
XXT = np.dot(X,X2.T)
if X is X2:
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
else:
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
# TODO Remove comments if this is fine.
# Commented out by Neil as doesn't seem to be used elsewhere.
#self._K_exponent = -0.5*self._K_dist2
self._K_dvar = np.exp(-0.5*self._K_dist2)
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
self._params = np.empty(shape=(1,0)) #ensure the next section gets called
if not np.all(self._params == self._get_params()):
self._params == self._get_params()
self._K_dist2 = np.square(self._K_dist/self.lengthscale)
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
def psi0(self,Z,mu,S,target):
target += self.variance
@ -132,7 +155,7 @@ class rbf(kernpart):
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var)
target[1] += np.sum(d_length*partial)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
@ -175,4 +198,3 @@ class rbf(kernpart):
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S

View file

@ -1,251 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
import hashlib
class rbf_ARD(kernpart):
def __init__(self,D,variance=1.,lengthscales=None):
"""
Arguments
----------
D: int - the number of input dimensions
variance: float
lengthscales : np.ndarray of shape (D,)
"""
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'rbf_ARD'
self.set_param(np.hstack((variance,lengthscales)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self):
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscales2 = np.square(self.lengthscales)
#reset cached results
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
self._K_computations(X,X2)
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
target[0] += np.sum(self._K_dvar*partial)
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
def dKdiag_dtheta(self,X,target):
target[0] += np.sum(partial)
def dK_dX(self,partial,X,X2,target):
self._K_computations(X,X2)
dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
dK_dX = -dZ.transpose(1,0,2)
target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
pass
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
np.add(target, self._psi1,target)
def dpsi1_dtheta(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q"""
self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
def dpsi2_dtheta(self,Z,mu,S,target):
"""Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0)
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
def dpsi2_dZ(self,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscales2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
self._params = np.empty(shape=(1,0))#ensure the next section gets called
if not np.all(self._params == self.get_param()):
self._params == self.get_param()
self._K_dist2 = np.square(self._K_dist/self.lengthscales)
self._K_exponent = -0.5*self._K_dist2.sum(-1)
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING
#psi1
self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
self._psi1 = self.variance*np.exp(self._psi1_exponent)
#psi2
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S
if __name__=='__main__':
#run some simple tests on the kernel (TODO:move these to unititest)
#TODO: these are broken in this new structure!
N = 10
M = 5
Q = 3
Z = np.random.randn(M,Q)
mu = np.random.randn(N,Q)
S = np.random.rand(N,Q)
var = 2.5
lengthscales = np.ones(Q)*0.7
k = rbf(Q,var,lengthscales)
from checkgrad import checkgrad
def k_theta_test(param,k):
k.set_param(param)
K = k.K(Z)
dK_dtheta = k.dK_dtheta(Z)
f = np.sum(K)
df = dK_dtheta.sum(0).sum(0)
return f,np.array(df)
print "dk_dtheta_test"
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
def psi1_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
return f,df.flatten()
print "psi1_mu_test"
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
def psi1_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
return f,df.flatten()
print "psi1_S_test"
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
def psi1_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi1(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
return f,df
print "psi1_theta_test"
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
def psi2_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
return f,df.flatten()
print "psi2_mu_test"
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
def psi2_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
return f,df.flatten()
print "psi2_S_test"
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
def psi2_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi2(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
return f,df
print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

View file

@ -25,15 +25,15 @@ class spline(kernpart):
assert self.D==1
self.Nparam = 1
self.name = 'spline'
self.set_param(np.squeeze(variance))
self._set_params(np.squeeze(variance))
def get_param(self):
def _get_params(self):
return self.variance
def set_param(self,x):
def _set_params(self,x):
self.variance = x
def get_param_names(self):
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):

View file

@ -44,7 +44,7 @@ class spkern(kernpart):
if param is None:
param = np.ones(self.Nparam)
assert param.size==self.Nparam
self.set_param(param)
self._set_params(param)
#Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
@ -247,12 +247,12 @@ class spkern(kernpart):
Z = X
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def set_param(self,param):
def _set_params(self,param):
#print param.flags['C_CONTIGUOUS']
self._param = param.copy()
def get_param(self):
def _get_params(self):
return self._param
def get_param_names(self):
def _get_param_names(self):
return [x.name for x in self._sp_theta]

View file

@ -17,16 +17,16 @@ class white(kernpart):
self.D = D
self.Nparam = 1
self.name = 'white'
self.set_param(np.array([variance]).flatten())
self._set_params(np.array([variance]).flatten())
def get_param(self):
def _get_params(self):
return self.variance
def set_param(self,x):
def _set_params(self,x):
assert x.shape==(1,)
self.variance = x
def get_param_names(self):
def _get_param_names(self):
return ['variance']
def K(self,X,X2,target):